## Filter Articles For Each Company

In [None]:
import libs
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display 

import yfinance as yf
import duckdb

import io
from urllib.request import urlopen
import zipfile
import os

from menuinst.platforms.win_utils.knownfolders import folder_path
from sipbuild.generator.parser.tokens import keywords
pd.set_option('display.max_rows', 5)

In [None]:
conn = duckdb.connect(database='../../eda-ddb/eda-gdelt.ddb', read_only=True, config= {'access_mode': 'READ_ONLY'} )

In [None]:
stocks_to_keywords_broad = {
    "ADM": ["agriculture", "commodities", "grain", "processing", "sustainability"],
    "JNJ": ["healthcare", "pharmaceuticals", "medical-devices", "consumer-health", "vaccines"],
    "NEM": ["mining", "gold", "precious-metals", "exploration", "sustainability"],
    "V": ["payments", "credit-cards", "digital-payments", "transactions", "financial-services"],
    "PG": ["consumer-goods", "hygiene", "household-products", "personal-care", "brands"],
    "ABBV": ["biopharmaceuticals", "immunology", "oncology", "healthcare"],
    "CVX": ["oil", "energy", "natural-gas", "petroleum", "sustainability"],
    "PEP": ["pepsi", "beverages", "snacks", "food-products"],
    "T": ["telecommunications", "wireless", "internet", "broadband", "5g", "internet-services"],
    "VZ": ["telecommunications", "wireless", "5g", "broadband", "internet", "internet-services" ],
    "AAPL": ["iphone", "ipad", "ios", "tim-cook", "icloud"],
    "CTVA": ["agriculture", "biotechnology", "seeds", "crop-protection", "sustainability"],
    "XOM": ["exxon-mobil", "oil", "energy", "petroleum", "natural-gas", "exploration"],
    "JPM": ["banking", "investment", "financial-services", "wealth-management", "loans"],
    "DE": ["agriculture", "machinery", "construction", "equipment", "sustainability"],
    "COP": ["oil", "energy", "exploration", "natural-gas", "sustainability"],
    "MA": ["payments", "credit-cards", "financial-services", "transactions", "digital-payments"],
    "KO": ["sprite", "fanta", "schweppes", "powerade", "beverages", "soft-drinks"],
    "MSFT": ["software", "cloud", "windows", "satya-nadella"],
    "DOW": ["dow", "chemicals", "materials", "plastics", "manufacturing", "sustainability"],
    "FCX": ["mining", "copper", "gold", "exploration", "sustainability"],
    "NVDA": ["gpu", "artificial-intelligence", "graphics", "semiconductors"],
    "BP": ["british-petroleum", "oil", "energy", "petroleum", "natural-gas", "renewable-energy"],
    "PFE": ["pharmaceuticals", "vaccines", "biopharmaceuticals", "healthcare"],
    "TMUS": ["telecommunications", "wireless", "5g", "broadband", "mobile"]
}

stocks_to_keywords_specific = {
    "ADM": ["agribusiness", "grain-processing", "sustainable-agriculture", "commodity-markets", "food-supply-chain"],
    "JNJ": ["healthcare-innovation", "pharmaceutical-research", "medical-devices", "consumer-health-products", "vaccine-development"],
    "NEM": ["gold-mining", "precious-metals-investment", "mining-operations", "sustainability-in-mining", "gold-exploration"],
    "V": ["digital-payments", "credit-card-services", "financial-technology", "payment-processing", "transaction-security"],
    "PG": ["consumer-goods", "hygiene-products", "personal-care-brands", "household-brands", "product-innovation"],
    "ABBV": ["biopharmaceuticals", "immunology-research", "oncology-treatments", "healthcare-solutions"],
    "CVX": ["oil-and-gas", "energy-sector", "petroleum-production", "sustainable-energy", "natural-gas-exploration"],
    "PEP": ["beverages-industry", "snack-foods", "consumer-products", "sustainability-in-food", "brand-marketing"],
    "T": ["telecommunications-services", "wireless-network", "broadband-internet", "entertainment-services"],
    "VZ": ["5g-technology", "telecommunications-network", "internet-services", "wireless-solutions"],
    "AAPL": ["iphone-development", "ios-software", "consumer-electronics", "tech-innovation"],
    "CTVA": ["agricultural-biotechnology", "crop-seeds", "sustainable-agriculture", "agricultural-innovation"],
    "XOM": ["oil-industry", "energy-resources", "petroleum-markets", "natural-gas-production"],
    "JPM": ["banking-industry", "investment-banking", "financial-services-innovation", "wealth-management-strategies"],
    "DE": ["deere", "agriculture-machinery", "construction-equipment", "sustainable-agriculture", "farm-technology"],
    "COP": ["oil-and-gas-exploration", "energy-production", "natural-gas-resources", "sustainability-in-energy"],
    "MA": ["digital-payments", "financial-services", "credit-card-industry", "payment-processing-technology"],
    "KO": ["coke", "cola","sprite", "fanta", "schweppes", "powerade", "beverages-market", "soft-drink-industry", "brand-marketing", "consumer-goods"],
    "MSFT": ["cloud-computing", "software-development", "windows-platform", "technology-leadership"],
    "DOW": ["dow", "chemical-manufacturing", "materials-science", "plastics-production", "sustainable-materials"],
    "FCX": ["copper-mining", "gold-mining", "mineral-resources", "sustainability-in-mining"],
    "NVDA": ["gpu-technology", "artificial-intelligence", "graphics-processing", "semiconductor-industry"],
    "BP": ["british-petroleum", "energy-sector", "petroleum-industry", "natural-gas-production", "renewable-energy-solutions"],
    "PFE": ["pharmaceuticals-research", "vaccine-innovation", "biopharmaceuticals", "healthcare-development"],
    "TMUS": ["telecommunications-industry", "5g-network", "mobile-services", "broadband-solutions"]
}

In [None]:

stocks_to_keywords_blacklist = {
    "ADM": [],
    "JNJ": ["boris"],
    "NEM": [],
    "V": ["citizenship", "passport", "trump"],
    "PG": [],
    "ABBV": [],
    "CVX": [],
    "PEP": [],
    "T": [],
    "VZ": [],
    "AAPL": [],
    "CTVA": [],
    "XOM": [],
    "JPM": [],
    "DE": [],
    "COP": [],
    "MA": [],
    "KO": [],
    "MSFT": [],
    "DOW": ["down", "shutdown"],
    "FCX": [],
    "NVDA": [],
    "BP": [],
    "PFE": [],
    "TMUS": []
}


In [None]:
stocks_to_company_names = {
    "ADM" : ["archer-daniels" ,"archer-daniels-midland", ],
    "JNJ" : ["johnson-and-johnson", "johnson-johnson"],
    "NEM" : ["newmont-corporation"],
    "V" : ["visa"],
    "PG" : ["procter-and-gamble"],
    "ABBV" : ["abbvie"],
    "CVX" : ["chevron"],
    "PEP" : ["pepsico", "pepsi"],
    "T" : ["att", "at-t"],
    "VZ" : ["verizon"],
    "AAPL" : ["apple"],
    "CTVA" : ["corteva"],
    "XOM" : ["exxon-mobil", "exxon"],
    "JPM" : ["jpmorgan-chase"],
    "DE" : ["deere", "john-deere"],
    "COP" : ["conocophillips"],
    "MA" : ["mastercard"],
    "KO" : ["coca-cola", "coke", "cola", "sprite", "fanta", "schweppes", "powerade"],
    "MSFT" : ["microsoft", "azure"],
    "DOW" : ["dow-jones", "dow"],
    "FCX" : ["freeport-mcmoran"],
    "NVDA" : ["nvidia", "geforce"],
    "BP" : ["bp", "british-petroleum"],
    "PFE" : ["pfizer"],
    "TMUS" : ["t-mobile", "tmobile"],
}

In [None]:
sources = [
    # Major U.S. News
    'nytimes.com',               # The New York Times
    'washingtonpost.com',         # The Washington Post
    'bbc.com',                    # BBC News
    'reuters.com',                # Reuters
    'apnews.com',                 # Associated Press (AP)
    'npr.org',                    # National Public Radio (NPR)
    'politico.com',               # Politico
    'pbs.org',                    # PBS News
    'propublica.org',             # ProPublica
    'fivethirtyeight.com',        # FiveThirtyEight
    'cnn.com',                    # CNN
    'foxnews.com',                # Fox News
    'msnbc.com',                  # MSNBC
    'abcnews.go.com',             # ABC News
    'cbsnews.com',                # CBS News
    'usatoday.com',               # USA Today
    'latimes.com',                # Los Angeles Times
    'bloomberg.com',              # Bloomberg
    'wsj.com',                    # The Wall Street Journal
    'forbes.com',                 # Forbes
    'time.com',                   # TIME
    'newsweek.com',               # Newsweek
    'huffpost.com',               # HuffPost
    'vox.com',                    # Vox
    'axios.com',                  # Axios
    'buzzfeednews.com',           # BuzzFeed News

    # Major International News
    'theguardian.com',            # The Guardian (UK)
    'thetimes.co.uk',             # The Times (UK)
    'telegraph.co.uk',            # The Telegraph (UK)
    'independent.co.uk',          # The Independent (UK)
    'ft.com',                     # Financial Times (UK)
    'the-sun.com',                # The Sun (UK)
    'lemonde.fr',                 # Le Monde (France)
    'lefigaro.fr',                # Le Figaro (France)
    'dw.com',                     # Deutsche Welle (Germany)
    'spiegel.de',                 # Der Spiegel (Germany)
    'aljazeera.com',              # Al Jazeera (Qatar)
    'rt.com',                     # Russia Today (Russia)
    'haaretz.com',                # Haaretz (Israel)
    'timesofisrael.com',          # The Times of Israel (Israel)
    'straitstimes.com',           # The Straits Times (Singapore)
    'chinadaily.com.cn',          # China Daily (China)
    'japantimes.co.jp',           # The Japan Times (Japan)
    'abc.net.au',                 # ABC News (Australia)
    'smh.com.au',                 # The Sydney Morning Herald (Australia)
    'thestar.com',                # Toronto Star (Canada)
    'cbc.ca',                     # CBC News (Canada)
    'globalnews.ca',              # Global News (Canada)
    'elpais.com',                 # El País (Spain)
    'elmundo.es',                 # El Mundo (Spain)
    'clarin.com',                 # Clarín (Argentina)
    'folha.uol.com.br',           # Folha de S.Paulo (Brazil)
    'nation.co.ke',               # Daily Nation (Kenya)

    # Financial & Business
    'cnbc.com',                   # CNBC
    'marketwatch.com',            # MarketWatch
    'businessinsider.com',        # Business Insider
    'economist.com',              # The Economist
    'barrons.com',                # Barron’s
    'ft.com',                     # Financial Times

    # Tech & Science
    'wired.com',                  # WIRED
    'techcrunch.com',             # TechCrunch
    'mashable.com',               # Mashable
    'theverge.com',               # The Verge
    'arstechnica.com',            # Ars Technica
    'gizmodo.com',                # Gizmodo
    'cnet.com',                   # CNET
    'scientificamerican.com',     # Scientific American
    'nature.com',                 # Nature
    'newscientist.com',           # New Scientist

    # Specialized Journalism & Investigative Reporting
    'propublica.org',             # ProPublica
    'theintercept.com',           # The Intercept
    'democracynow.org',           # Democracy Now!
    'motherjones.com',            # Mother Jones
    'rollingstone.com',           # Rolling Stone
    'slate.com',                  # Slate
    'jacobinmag.com',             # Jacobin
    'newyorker.com',              # The New Yorker
    'vanityfair.com',             # Vanity Fair

    # Regional U.S. News
    'chicagotribune.com',         # Chicago Tribune
    'dallasnews.com',             # The Dallas Morning News
    'miamiherald.com',            # Miami Herald
    'boston.com',                 # Boston Globe
    'sfchronicle.com',            # San Francisco Chronicle
    'philly.com',                 # The Philadelphia Inquirer
    'startribune.com',            # Star Tribune (Minneapolis)
    'azcentral.com',              # The Arizona Republic

    # Latin America
    'eltiempo.com',               # El Tiempo (Colombia)
    'lanacion.com.ar',            # La Nación (Argentina)
    'elcomercio.pe',              # El Comercio (Peru)
    'eluniversal.com.mx',         # El Universal (Mexico)

    # Africa & Middle East
    'mg.co.za',                   # Mail & Guardian (South Africa)
    'punchng.com',                # The Punch (Nigeria)
    'gulfnews.com',               # Gulf News (UAE)
    'arabnews.com',               # Arab News (Saudi Arabia)
    'dailystar.com.lb',           # The Daily Star (Lebanon)
]

In [None]:
def generate_sql_query(search_terms, blacklist_terms,  news_sources, fast=True):
    
    if fast :
        search_conditions = " OR ".join([
            f"SOURCEURL LIKE '%{term}%'"
            for term in search_terms
        ])
    else:
        search_conditions = " OR ".join([
            f"SOURCEURL LIKE '%-{term}-%' OR "
            f"SOURCEURL LIKE '%-{term}' OR "
            f"SOURCEURL LIKE '%-{term}/%' OR "
            f"SOURCEURL LIKE '%/{term}-%' OR "
            f"SOURCEURL LIKE '%/{term}%'"
            for term in search_terms
        ])

    news_source_conditions = " OR ".join([f"SOURCEURL LIKE '%{source}%'\n" for source in news_sources])
    blacklist_conditions = " OR ".join([f"SOURCEURL LIKE '%{term}%'" for term in blacklist_terms])
    
    query = f"""
    SELECT Day, FractionDate, GoldsteinScale, AvgTone, NumArticles, NumMentions, SOURCEURL
    FROM gdelt
    WHERE ({search_conditions})
    AND ({news_source_conditions})
    """
    
    if blacklist_terms != list():
        query += f"AND NOT ({blacklist_conditions})"

    query += ";"
    return query

In [None]:
print(generate_sql_query(["terms"], ["blacklist"], ["news"], fast=False))

In [None]:

def filter_gdelt(keywords, blacklist_words, folder_name):
    print(f"In Progress!: {folder_name}/{stock}_{(stocks_to_company_names[stock])[0]}.csv")
   
    q = generate_sql_query( keywords, blacklist_words, sources, fast=False)
    # print(q)
    
    df: pd.DataFrame = conn.query( q ).execute().fetchdf()
    df = df.drop_duplicates(subset=['SOURCEURL'])
    df['Date'] = pd.to_datetime(df['Day'], format='%Y%m%d')
    
    df.drop(columns=['Day', 'FractionDate'], inplace=True)
    display(df)
    df.to_csv(f"{folder_name}/{stock}_{(stocks_to_company_names[stock])[0]}.csv", index=False)

    print(f"Done!: {folder_name}/{stock}_{(stocks_to_company_names[stock])[0]}.csv", len(df))



In [None]:
folder = ["stocks_to_keywords_specific", "stocks_to_keywords_broad", "stocks_to_company_names"]

for i,(stock_to_words) in enumerate([ stocks_to_keywords_specific, stocks_to_keywords_broad, list() ]):
    for stock in stock_to_words.keys():
        filter_gdelt(stock_to_words[stock] + stocks_to_company_names[stock], stocks_to_keywords_blacklist[stock], folder[i])
        

In [None]:
folder = ["stocks_to_keywords_broad", "stocks_to_company_names"]

for i, (stock_to_words) in enumerate([stocks_to_keywords_broad, list()]):
    for stock in stock_to_words.keys():
        filter_gdelt(stock_to_words[stock] + stocks_to_company_names[stock], stocks_to_keywords_blacklist[stock], folder[i])

In [15]:
folder = [ "stocks_to_company_names"]

for i, (stock_to_words) in enumerate([stocks_to_company_names]):
    for stock in stock_to_words.keys():
        filter_gdelt( stocks_to_company_names[stock], stocks_to_keywords_blacklist[stock], folder[i])

In Progress!: stocks_to_company_names/ADM_archer-daniels.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,6.4,-4.011598,12.0,12.0,https://www.reuters.com/article/us-archer-dani...,2019-02-05
1,0.0,-3.045685,6.0,6.0,https://www.marketwatch.com/story/archer-danie...,2019-03-25
...,...,...,...,...,...,...
121,0.0,1.404853,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-08-27
122,7.4,1.897810,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-08-29


Done!: stocks_to_company_names/ADM_archer-daniels.csv 80
In Progress!: stocks_to_company_names/JNJ_johnson-and-johnson.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,0.0,-4.055304,160.0,160.0,https://in.reuters.com/article/us-johnson-john...,2019-01-01
2,-0.3,-2.905569,1.0,1.0,https://www.foxnews.com/politics/dem-rep-johns...,2019-01-02
...,...,...,...,...,...,...
130084,0.0,-0.903614,2.0,2.0,https://www.independent.co.uk/news/uk/politics...,2024-10-15
130092,3.0,-2.081448,2.0,2.0,https://www.foxnews.com/media/speaker-johnson-...,2024-10-15


Done!: stocks_to_company_names/JNJ_johnson-and-johnson.csv 21821
In Progress!: stocks_to_company_names/NEM_newmont-corporation.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,4.0,0.677416,6.0,6.0,https://www.marketwatch.com/press-release/agni...,2020-09-30
3,6.0,4.621849,10.0,10.0,https://www.forbes.com/sites/greatspeculations...,2022-04-21
4,3.0,-1.085271,10.0,10.0,https://www.cbc.ca/news/canada/north/yukon-new...,2022-04-27
14,-4.0,-2.088167,5.0,5.0,https://www.forbes.com/sites/greatspeculations...,2022-06-03


Done!: stocks_to_company_names/NEM_newmont-corporation.csv 4
In Progress!: stocks_to_company_names/V_visa.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-4.0,-4.186047,10.0,10.0,https://punchng.com/thousands-to-miss-visa-app...,2019-01-01
6,4.0,5.949657,10.0,10.0,https://gulfnews.com/uae/government/long-term-...,2019-01-01
...,...,...,...,...,...,...
24180,3.2,2.588556,2.0,2.0,https://gulfnews.com/living-in-uae/visa-immigr...,2024-10-14
24184,3.0,1.832461,10.0,10.0,https://gulfnews.com/uae/expat-residents-of-gc...,2024-10-14


Done!: stocks_to_company_names/V_visa.csv 3902
In Progress!: stocks_to_company_names/PG_procter-and-gamble.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,0.0,0.819672,10.0,10.0,https://www.cnbc.com/2019/07/30/procter-and-ga...,2019-07-30
1,-2.0,-0.805987,7.0,7.0,https://www.usatoday.com/story/money/2019/09/1...,2019-09-12
7,0.0,0.551724,10.0,10.0,https://www.marketwatch.com/press-release/glob...,2019-09-20
9,-2.0,1.271433,20.0,20.0,https://www.marketwatch.com/press-release/hand...,2020-02-07


Done!: stocks_to_company_names/PG_procter-and-gamble.csv 4
In Progress!: stocks_to_company_names/ABBV_abbvie.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-2.0,-5.021368,20.0,20.0,https://www.reuters.com/article/abbvie-stemcen...,2019-01-04
1,-2.0,-4.103645,40.0,40.0,https://www.reuters.com/article/us-abbvie-stem...,2019-01-05
...,...,...,...,...,...,...
245,7.4,0.998752,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-06-22
246,0.0,1.433692,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-07-04


Done!: stocks_to_company_names/ABBV_abbvie.csv 107
In Progress!: stocks_to_company_names/CVX_chevron.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,0.0,0.846271,30.0,30.0,https://www.tickerreport.com/banking-finance/4...,2019-01-14
2,0.4,-0.577367,10.0,10.0,https://www.forbes.com/sites/gauravsharma/2019...,2019-01-16
...,...,...,...,...,...,...
3664,7.0,-3.314917,10.0,10.0,https://www.theguardian.com/environment/2024/s...,2024-09-30
3665,0.4,-0.510204,2.0,2.0,https://www.cnbc.com/2024/09/30/ftc-clears-che...,2024-09-30


Done!: stocks_to_company_names/CVX_chevron.csv 950
In Progress!: stocks_to_company_names/PEP_pepsico.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,3.0,2.004454,20.0,20.0,https://www.usatoday.com/story/money/2019/01/0...,2019-01-03
1,-2.0,0.722851,30.0,30.0,https://www.cnet.com/news/this-little-pepsico-...,2019-01-04
...,...,...,...,...,...,...
860,-9.2,5.115090,10.0,10.0,https://www.vendingmarketwatch.com/management/...,2024-10-03
861,2.8,0.771208,4.0,4.0,https://www.forbes.com/sites/chloesorvino/2024...,2024-10-11


Done!: stocks_to_company_names/PEP_pepsico.csv 277
In Progress!: stocks_to_company_names/T_att.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-10.0,-12.153880,60.0,60.0,https://abcnews.go.com/International/wireStory...,2019-01-02
8,3.4,-4.295403,2.0,2.0,https://www.forbes.com/sites/andyjsemotiuk/201...,2019-01-02
...,...,...,...,...,...,...
17798,-10.0,-5.641026,3.0,3.0,https://www.barrons.com/news/attack-damages-sh...,2024-10-10
17802,-10.0,-5.241379,2.0,2.0,https://www.theguardian.com/world/2024/oct/14/...,2024-10-14


Done!: stocks_to_company_names/T_att.csv 3294
In Progress!: stocks_to_company_names/VZ_verizon.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,4.0,5.465587,15.0,15.0,https://www.marketwatch.com/press-release/tegn...,2019-01-03
4,10.0,-0.239234,10.0,10.0,https://www.cnbc.com/2019/01/07/top-technician...,2019-01-07
...,...,...,...,...,...,...
2122,7.0,3.259259,6.0,6.0,https://www.forbes.com/sites/randybean/2024/10...,2024-10-14
2127,-2.0,-4.705882,10.0,10.0,https://www.theverge.com/2024/10/14/24270432/v...,2024-10-15


Done!: stocks_to_company_names/VZ_verizon.csv 759
In Progress!: stocks_to_company_names/AAPL_apple.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-4.0,-2.511748,120.0,120.0,https://www.businessinsider.com.au/netflix-sto...,2019-01-02
1,3.0,0.135719,10.0,10.0,https://www.marketwatch.com/press-release/lett...,2019-01-02
...,...,...,...,...,...,...
30310,7.0,-1.011378,4.0,4.0,https://www.newsweek.com/apples-iphone-hits-sa...,2024-10-15
30315,6.0,-1.290323,10.0,10.0,https://time.com/7093536/surgeons-apple-vision...,2024-10-15


Done!: stocks_to_company_names/AAPL_apple.csv 9697
In Progress!: stocks_to_company_names/CTVA_corteva.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-2.0,1.693405,4.0,4.0,https://www.marketwatch.com/press-release/cort...,2019-02-21
1,7.0,-0.668449,2.0,2.0,https://www.newsweek.com/humane-society-petiti...,2019-03-14
...,...,...,...,...,...,...
80,0.0,1.903553,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-09-30
82,0.0,2.411168,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-10-10


Done!: stocks_to_company_names/CTVA_corteva.csv 35
In Progress!: stocks_to_company_names/XOM_exxon-mobil.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-4.0,-1.404557,65.0,65.0,https://uk.reuters.com/article/uk-usa-court-ex...,2019-01-07
2,0.4,-0.437318,5.0,5.0,https://www.cnbc.com/2018/12/19/reuters-americ...,2019-01-07
...,...,...,...,...,...,...
4842,0.0,2.020202,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-10-10
4843,7.0,0.175439,10.0,16.0,https://www.cbc.ca/news/canada/newfoundland-la...,2024-10-10


Done!: stocks_to_company_names/XOM_exxon-mobil.csv 1251
In Progress!: stocks_to_company_names/JPM_jpmorgan-chase.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,7.4,0.630472,10.0,10.0,https://www.marketwatch.com/press-release/new-...,2019-01-15
6,7.4,1.185262,20.0,20.0,https://www.tickerreport.com/banking-finance/4...,2019-01-23
...,...,...,...,...,...,...
724,0.0,2.837684,5.0,5.0,https://www.tickerreport.com/banking-finance/1...,2024-10-05
725,0.0,2.210884,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-10-12


Done!: stocks_to_company_names/JPM_jpmorgan-chase.csv 274
In Progress!: stocks_to_company_names/DE_deere.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,5.2,-2.932099,4.0,4.0,https://www.washingtonpost.com/business/kubota...,2019-01-22
1,1.9,2.500000,20.0,20.0,http://precision.agwired.com/2019/01/28/develo...,2019-01-28
...,...,...,...,...,...,...
712,0.0,-2.121613,2.0,2.0,https://www.techdirt.com/2024/10/11/john-deere...,2024-10-11
713,0.0,-1.978417,1.0,1.0,https://www.techdirt.com/tag/john-deere-tractors/,2024-10-11


Done!: stocks_to_company_names/DE_deere.csv 201
In Progress!: stocks_to_company_names/COP_conocophillips.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,3.4,-1.287177,10.0,10.0,https://www.marketwatch.com/press-release/cono...,2019-02-15
1,-4.4,-3.185214,115.0,115.0,http://streetwisereport.com/stocks-with-profit...,2019-02-25
...,...,...,...,...,...,...
364,0.0,2.196532,4.0,4.0,https://www.tickerreport.com/banking-finance/1...,2024-10-03
366,0.0,1.648352,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-10-05


Done!: stocks_to_company_names/COP_conocophillips.csv 124
In Progress!: stocks_to_company_names/MA_mastercard.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,0.0,1.582734,6.0,6.0,https://www.vox.com/the-goods/2019/1/7/1817257...,2019-01-08
7,3.4,0.606713,17.0,24.0,https://gulfnews.com/technology/media/no-words...,2019-01-08
...,...,...,...,...,...,...
1572,0.0,-0.910846,20.0,20.0,https://www.cnbc.com/2024/10/01/mastercard-to-...,2024-10-01
1574,0.0,3.456985,20.0,20.0,https://www.tickerreport.com/banking-finance/1...,2024-10-05


Done!: stocks_to_company_names/MA_mastercard.csv 571
In Progress!: stocks_to_company_names/KO_coca-cola.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,5.0,-0.245098,40.0,40.0,https://uk.reuters.com/article/uk-whitbread-m-...,2019-01-03
1,1.9,1.164144,10.0,10.0,https://www.forbes.com/sites/michaelgoldstein/...,2019-01-06
...,...,...,...,...,...,...
2586,7.4,2.039723,70.0,70.0,https://www.tickerreport.com/banking-finance/1...,2024-10-10
2588,0.0,2.245089,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-10-10


Done!: stocks_to_company_names/KO_coca-cola.csv 722
In Progress!: stocks_to_company_names/MSFT_microsoft.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,7.0,0.217391,10.0,10.0,https://www.forbes.com/sites/amitchowdhry/2018...,2018-12-25
1,7.0,1.489682,30.0,30.0,https://www.theverge.com/2019/1/2/18164916/mic...,2019-01-02
...,...,...,...,...,...,...
19369,-2.0,-5.902192,1.0,1.0,https://www.newsweek.com/russia-china-using-cy...,2024-10-15
19373,-2.0,-1.564723,2.0,2.0,https://mashable.com/article/microsoft-ai-chat...,2024-10-15


Done!: stocks_to_company_names/MSFT_microsoft.csv 5941
In Progress!: stocks_to_company_names/DOW_dow-jones.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,10.0,-0.239234,10.0,10.0,https://www.cnbc.com/2019/01/07/top-technician...,2019-01-07
4,-5.0,-2.081165,6.0,6.0,https://www.nytimes.com/2019/01/12/opinion/sun...,2019-01-12
...,...,...,...,...,...,...
1293,-5.0,-5.154639,10.0,10.0,https://www.marketwatch.com/livecoverage/stock...,2024-09-24
1294,3.4,2.645503,10.0,10.0,https://www.marketwatch.com/livecoverage/stock...,2024-10-01


Done!: stocks_to_company_names/DOW_dow-jones.csv 330
In Progress!: stocks_to_company_names/FCX_freeport-mcmoran.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-5.0,-0.108578,6.0,6.0,https://www.reuters.com/article/us-freeport-mc...,2018-12-25
2,5.0,0.000000,1.0,1.0,https://www.wsj.com/articles/stocks-to-watch-b...,2019-03-18
...,...,...,...,...,...,...
149,7.4,2.455869,30.0,30.0,https://www.tickerreport.com/banking-finance/1...,2024-09-24
150,7.4,2.328289,10.0,10.0,https://www.tickerreport.com/banking-finance/1...,2024-09-29


Done!: stocks_to_company_names/FCX_freeport-mcmoran.csv 77
In Progress!: stocks_to_company_names/NVDA_nvidia.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,0.4,0.520833,10.0,10.0,https://www.theverge.com/2019/1/7/18170951/nvi...,2019-01-07
2,4.0,1.723606,40.0,40.0,https://techreport.com/news/34369/nvidia-expan...,2019-01-08
...,...,...,...,...,...,...
3372,3.4,-2.010050,3.0,3.0,https://www.forbes.com/sites/dereksaul/2024/10...,2024-10-15
3374,0.0,-2.390438,2.0,2.0,https://markets.businessinsider.com/news/stock...,2024-10-15


Done!: stocks_to_company_names/NVDA_nvidia.csv 1270
In Progress!: stocks_to_company_names/BP_bp.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,0.0,1.383399,10.0,10.0,https://www.tickerreport.com/banking-finance/4...,2019-01-05
1,4.0,0.909091,10.0,10.0,https://uk.reuters.com/article/bp-trinidadtoba...,2019-01-08
...,...,...,...,...,...,...
3807,-2.0,1.323282,16.0,16.0,https://www.telegraph.co.uk/business/2024/10/0...,2024-10-07
3808,6.0,-2.978723,8.0,8.0,https://www.telegraph.co.uk/business/2024/10/0...,2024-10-02


Done!: stocks_to_company_names/BP_bp.csv 966
In Progress!: stocks_to_company_names/PFE_pfizer.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,2.8,1.012658,5.0,5.0,https://www.cnbc.com/2019/01/28/pfizer-the-wor...,2019-01-28
3,-2.0,2.095423,50.0,50.0,https://in.reuters.com/article/us-usa-healthca...,2019-02-06
...,...,...,...,...,...,...
17974,0.0,2.213301,20.0,20.0,https://www.tickerreport.com/banking-finance/1...,2024-10-10
17975,-0.1,-0.816309,30.0,30.0,https://www.cnbc.com/2024/10/10/former-pfizer-...,2024-10-10


Done!: stocks_to_company_names/PFE_pfizer.csv 4170
In Progress!: stocks_to_company_names/TMUS_t-mobile.csv


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,GoldsteinScale,AvgTone,NumArticles,NumMentions,SOURCEURL,Date
0,-5.0,-8.895706,5.0,5.0,https://www.washingtonpost.com/local/thieves-c...,2019-01-05
5,0.0,0.717949,10.0,10.0,https://www.washingtonpost.com/technology/2019...,2018-01-08
...,...,...,...,...,...,...
1874,-5.0,-1.405152,10.0,10.0,https://mashable.com/article/hurricane-milton-...,2024-10-09
1876,0.0,0.000000,8.0,8.0,https://www.techdirt.com/2024/10/10/u-s-cellul...,2024-10-10


Done!: stocks_to_company_names/TMUS_t-mobile.csv 670


In [16]:
conn.close()