In [14]:
import sys
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
sys.path.append("..")

# news scraping
from src.zinfo.news_scraper import NewsScraper

# clustering
from src.zinfo.article_clustering import get_vectorized_titles
from src.zinfo.article_clustering import get_num_clusters_per_val
from src.zinfo.article_clustering import cluster_articles

# article selecting
from src.zinfo.article_selector import get_best_article_all_clusters

# when I officially started running the algorithm
start_date = "2021-08-13"

In [16]:
all_news = pd.read_csv("../data/all_news.csv")
selected_news = pd.read_csv("../data/selected_articles.csv")
sources = selected_news.source.value_counts().to_dict()

In [18]:
all_news

Unnamed: 0,date,title,url,source,topic
0,2021-08-12T20:00:24Z,Open Post: Hosted By Brendan Fraser Getting Em...,https://dlisted.com/2021/08/12/open-post-hoste...,,Brendan Fraser
1,2021-08-12T14:45:00Z,"Sometimes, the Internet is Good: Brendan Frase...",https://www.pajiba.com/web_culture/sometimes-t...,,Brendan Fraser
2,2021-08-12T17:30:38Z,10 Brilliant Martin Scorsese Movies to Buy on ...,https://www.indiewire.com/shop/best-martin-sco...,,Brendan Fraser
3,2021-08-12T20:29:00Z,Watch: Brendan Fraser Gets Emotional in Respon...,https://comicbook.com/movies/news/brendan-fras...,,Brendan Fraser
4,2021-08-12T21:05:00Z,GreenPower Reports Fiscal First Quarter 2022 F...,https://finance.yahoo.com/news/greenpower-repo...,,Brendan Fraser
...,...,...,...,...,...
12006,2021-08-26T07:11:34Z,AP News Digest 3:10 am,https://www.independent.co.uk/news/world/ameri...,Independent,Nirvana Nevermind
12007,2021-08-26T14:20:33Z,Porn star Ron Jeremy indicted on more than 30 ...,https://globalnews.ca/news/8142376/ron-jeremy-...,Global News,Nirvana Nevermind
12008,2021-08-26T09:13:16Z,Glory Days! Remembering the finest gigs Cork's...,https://www.echolive.ie/entertainment/whatson/...,Echo Live,Nirvana Nevermind
12009,2021-08-26T11:10:40Z,"Baby from Nirvana album cover now suing band, ...",https://abc7ny.com/baby-sues-nirvana-nevermind...,WABC-TV,Nirvana Nevermind


In [19]:
selected_news

Unnamed: 0,date,title,url,source,topic,num_articles
0,2021-08-11,"'Hard Knocks' Power Rankings: Dak Prescott, Mi...",https://slashdot.org/firehose.pl?op=view&amp;i...,,Hard Knocks 2021,47
1,2021-08-11,NFL playoff teams that could miss postseason i...,https://www.cbssports.com/nfl/news/nfl-playoff...,,Hard Knocks 2021,4
2,2021-08-11,Chris Cuomo Defied CNN Ban On Advising His Sex...,http://www.womensystems.com/2021/08/chris-cuom...,,Kerry Kennedy,271
3,2021-08-11,Messi joins PSG: Lionel Messi talks about play...,https://www.insidesport.co/messi-joins-psg-lio...,,PSG,83
4,2021-08-11,Christina Applegate announces multiple scleros...,https://www.scotsman.com/health/christina-appl...,,Christina Applegate,4
...,...,...,...,...,...,...
268,2021-08-26,Monica Lewinsky insisted thong-flashing scene ...,https://www.dailymail.co.uk/femail/article-993...,Daily Mail,Monica Lewinsky,8
269,2021-08-26,Tropical Storm Ida forecast to pose major hurr...,https://www.axios.com/tropical-storm-ida-hurri...,Axios,Hurricane,9
270,2021-08-26,Man photographed as baby for Nirvana 'Nevermin...,https://www.wfaa.com/article/news/nation-world...,WFAA.com,Nirvana Nevermind,14
271,2021-08-26,Manchester City's Benjamin Mendy charged with ...,https://www.cbssports.com/soccer/news/manchest...,CBS Sports,Benjamin Mendy,35


In [20]:
sources

{'Yahoo Entertainment': 17,
 'Independent': 11,
 'USA Today': 10,
 'CBS Sports': 5,
 'Slashdot.org': 5,
 'The Daily Caller': 4,
 'Bleacher Report': 4,
 'CBC News': 3,
 'Business Insider': 3,
 'Fox News': 3,
 'Variety': 3,
 'ComicBook.com': 3,
 'Bloomberg': 3,
 'HYPEBEAST': 3,
 'TheWrap': 3,
 'CBS News': 3,
 'Stereogum': 3,
 'The Guardian': 3,
 'Daily Mail': 3,
 'NPR': 3,
 'Reuters': 2,
 'Thatsnerdalicious.com': 2,
 'The Boston Globe': 2,
 'Worldsoccertalk.com': 2,
 'HuffPost': 2,
 'GameSpot': 2,
 'EURACTIV': 2,
 'MMA Fighting': 2,
 'Newsweek': 2,
 'NBC News': 2,
 'Access': 2,
 'NBCSports.com': 2,
 'Independent.ie': 2,
 'The Hill': 2,
 'New York Post': 2,
 'ETCanada.com': 2,
 'CNA': 2,
 'U.S. Soccer': 2,
 'Billings Gazette': 2,
 'Eonline.com': 2,
 'The Week Magazine': 2,
 'Destructoid': 2,
 'Page Six': 2,
 'Associated Press': 2,
 'The Times of India': 2,
 'Al Jazeera English': 2,
 'Www.https': 2,
 'BBC News': 1,
 'The Weather Channel': 1,
 'The Washington Post': 1,
 '4029tv': 1,
 'The-s

## Tracking trending keywords over multiple days

In [63]:
remove_dup_news = selected_news.drop_duplicates(subset=["date", "topic"])
topics = remove_dup_news.topic.value_counts()
topics = topics.loc[topics >= 2].index.to_list()
long_trends = remove_dup_news.loc[remove_dup_news.topic.isin(topics)].copy()
long_trends.sort_values(by="topic", inplace=True)

In [67]:
long_trends.set_index(["topic", "date"])

Unnamed: 0_level_0,Unnamed: 1_level_0,title,url,source,num_articles
topic,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
49ers,2021-08-23,Kyle Shanahan still won’t name 49ers’ starting...,https://ninerswire.usatoday.com/2021/08/22/49e...,USA Today,4
49ers,2021-08-15,Former Seahawks linebacker Mychal Kendricks to...,https://seahawkswire.usatoday.com/2021/08/15/f...,USA Today,4
Barcelona,2021-08-16,Soccer-Barca can escape financial hole in 18 m...,https://www.reuters.com/lifestyle/sports/socce...,Reuters,4
Barcelona,2021-08-21,Depay rescues draw for Barcelona in Bilbao,https://worldsoccertalk.com/2021/08/21/depay-r...,Worldsoccertalk.com,4
Bob Dylan,2021-08-17,Bob Dylan sued for alleged sexual abuse of minor,https://www.dw.com/en/bob-dylan-sued-for-alleg...,DW (English),5
Bob Dylan,2021-08-16,Bob Dylan sued for allegedly sexually abusing ...,https://pagesix.com/2021/08/16/bob-dylan-sued-...,Page Six,40
CM Punk,2021-08-26,CM Punk Teases Daniel Bryan During His AEW Dyn...,https://www.wrestlinginc.com/news/2021/08/cm-p...,Wrestling Inc.,3
CM Punk,2021-08-21,CM Punk Sells Free Ice Cream Bars to Fans Atte...,https://comicbook.com/wwe/news/aew-cm-punk-sel...,ComicBook.com,5
COVID booster shot,2021-08-18,What We Know About COVID-19 Booster Shots,http://www.thecut.com/2021/08/covid-19-booster...,The Cut,4
COVID booster shot,2021-08-17,US experts expected to recommend COVID-19 vacc...,https://www.wlky.com/article/us-experts-to-rec...,WLKY Louisville,4
