In [12]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time
from IPython.display import display, HTML

pd.set_option('display.max_rows', None)

def extraer_noticias_finviz(tick="TSLA") -> pd.DataFrame:
    """
    Extrae las noticias de un ticker de Finviz.
    Args:
        tick (str): Ticker de la acción a consultar.
    Returns:
        pd.DataFrame: DataFrame con las noticias, con fecha, título y URL.
    """

    url = f"https://finviz.com/quote.ashx?t={tick}&p=d"
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        )
    }

    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        raise Exception(f"Error al obtener la página: {response.status_code}")

    soup = BeautifulSoup(response.text, "html.parser")
    news_table = soup.find("table", class_="fullview-news-outer")

    noticias = []

    if news_table:
        rows = news_table.find_all("tr")
        for row in rows:
            cols = row.find_all("td")
            if len(cols) < 2:
                continue

            fecha = cols[0].text.strip()
            link_tag = cols[1].find("a")
            titulo = link_tag.text.strip() if link_tag else ""
            url = link_tag['href'] if link_tag and 'href' in link_tag.attrs else ""

            noticias.append({"fecha": fecha, "título": titulo, "url": url})

    df = pd.DataFrame(noticias)

    # Para los links que empiezan con '/news/', agregar el dominio finviz.com
    df['url'] = np.where(
        df['url'].str.startswith('/news/'),
        'https://finviz.com' + df['url'],
        df['url']
    )

    return df









def nombre_bonito(url):
    # Pone bonito el dominio de la URL, es para saber cuantas veces se repite un dominio en el DataFrame de noticias
    if "https://www." in url:
        url = url.replace("https://www.", "")
    elif "https://finance." in url:
        url = url.replace("https://finance.", "")
    elif "https://finviz.com/" in url:
        url = "finviz"
    elif "https://chainstoreage." in url:
        url = url.replace("https://chainstoreage.", "chainstoreage.")

    elif "https://seekingalpha." in url:
        url = url.replace("https://seekingalpha.", "seekingalpha.")

    elif "https://qz." in url:
        url = url.replace("https://qz.", "qz.")

    return url.split(".")[0]








def hiperreferenciar_titulos(df: pd.DataFrame) -> pd.DataFrame:
    """
    Agrega hiperenlaces a los títulos de las noticias en el DataFrame.
    Args:
        df (pd.DataFrame): DataFrame con las noticias.
    Returns:
        pd.DataFrame: DataFrame con los títulos convertidos en hiperenlaces.
    """

    if 'url' not in df.columns or 'título' not in df.columns:
        raise ValueError(
            "El DataFrame debe contener las columnas 'url' y 'título'.")

    df['título'] = df.apply(
        lambda row: f'<a href="{row["url"]}" target="_blank">{row["título"]}</a>' if row["url"] else row["título"],
        axis=1
    )

    return df







# Metodo
def web_scrap(tickers: list) -> dict:
    """
    Obtiene los datos de noticias para una lista de tickers de acciones.
    Utiliza la función `extraer_noticias_finviz` para cada ticker y maneja errores de conexión.
    Esta función espera 10 segundos entre cada solicitud para evitar el error 429 (Too Many Requests).
    Args:
        tickers (list): Lista de tickers de acciones a consultar.
    Returns:
        dict: Diccionario donde las claves son los tickers y los valores son DataFrames con las noticias.
    Raises:
        Exception: Si ocurre un error al extraer las noticias para un ticker, se espera 10 segundos y se reintenta.
    """

    dfs = {}
    for tick in tickers:
        try:
            dfs[tick] = extraer_noticias_finviz(tick)
            print(f"{tick} ya!")
        except Exception as e:
            print(f"Tenemos que esperar un poco más para {tick}: {e}")
            time.sleep(10) 
            dfs[tick] = extraer_noticias_finviz(tick)
            print(f"{tick} ya!")

    return dfs





# Metodo
def cuantas_veces(dfs: dict, tick: str, index = False) -> pd.DataFrame:
    """
    Cuenta cuántas veces aparece cada journal en las noticias de un ticker y lo muestra como DataFrame.
    Args:
        dfs (dict): Diccionario de DataFrames de noticias por ticker.
        tick (str): Ticker de la acción a consultar.
    index (bool): Si es True, muestra el índice del DataFrame. Por defecto es False.
    Returns:
        pd.DataFrame: DataFrame con el conteo de cada journal.
    """

    df = dfs[tick]
    conteo = df["url"].apply(nombre_bonito).value_counts().reset_index()
    conteo.columns = ['journal', 'veces']

    display(HTML(f'<h2 style="text-align:left;">Últimas 100 noticias de {tick}</h2>'))

    if index:
        display(conteo.style.set_properties(subset=['journal'], **{'text-align': 'left'})
               .set_properties(subset=['veces'], **{'text-align': 'right'})
               .set_table_styles([{'selector': 'th', 'props': [('text-align', 'center')]}]))
    else:
        display(conteo.style.hide(axis="index"))
    
    return None



# Metodo
def cuantas_veces_todos(dfs: dict, index = False) -> None:
    for tick, df in dfs.items():
        cuantas_veces(dfs, tick, index=index)






def agrupar_journals(df: pd.DataFrame, journals: list) -> None:
    """
    Agrega los datos de noticias de diferentes journals al DataFrame total.
    Args:
        dfs (pd.DataFrame): DataFrame con las noticias.
        journals (list): Lista de nombres de journals a filtrar.
    """
    
    # Generalizar para cualquier lista de journals
    dftotal = pd.DataFrame(columns=df.columns)
    fila_vacia = pd.DataFrame([{col: "" for col in df.columns}])

    for journal in journals:
        # Filtrar por journal
        df_journal = df[df['url'].str.contains(journal, case=False, na=False)]
        # Fila con el nombre del journal
        fila_nombre = fila_vacia.copy()
        fila_vacia['fecha'] = "---"
        fila_nombre['título'] = journal
        # Concatenar: nombre, noticias, fila vacía
        dftotal = pd.concat([dftotal, fila_vacia, fila_nombre, df_journal], ignore_index=True)
    
    return dftotal



def highlight_fila_nombre(row, journals):
    if row['título'] in journals:
        return ['font-weight: bold'] * len(row)
    else:
        return [''] * len(row)
    




def quiero_ver(tick: str, journals: list, dfs: dict, index = False) -> None:
    """
    Muestra las noticias de un ticker específico y los journals seleccionados.
    Args:
        tick (str): Ticker de la acción a consultar.
        journals (list): Lista de nombres de journals a filtrar.
        dfs (dict): Diccionario de DataFrames de noticias por ticker.
        index (bool): Si es True, muestra el índice del DataFrame. Por defecto es False.
    Returns:
        None: Muestra las noticias en un formato HTML estilizado.
    Raises:
        ValueError: Si el ticker no se encuentra en el diccionario de DataFrames.
    """
    dftotal = agrupar_journals(dfs[tick], journals)
    dftotal = hiperreferenciar_titulos(dftotal)
    display(HTML(f'<h2 style="text-align:left;">Noticias de {tick}</h2>'))

    if index:
        display(
            dftotal.drop(columns=['url'])
            .style
            .apply(lambda row: highlight_fila_nombre(row, journals), axis=1)
            .set_properties(subset=['fecha'], **{'text-align': 'right'})
            .set_properties(subset=['título'], **{'text-align': 'left'})
            .set_table_styles([{'selector': 'th', 'props': [('text-align', 'center')]}])
        )
    else:
        display(
            dftotal.drop(columns=['url'])
            .style
            .hide(axis="index")
            .apply(lambda row: highlight_fila_nombre(row, journals), axis=1)
            .set_properties(subset=['fecha'], **{'text-align': 'right'})
            .set_properties(subset=['título'], **{'text-align': 'left'})
            .set_table_styles([{'selector': 'th', 'props': [('text-align', 'center')]}])
        )
    
    return None

def quiero_ver_todos(journals, dfs, index = False) -> None:
    """    Muestra las noticias de todos los tickers para los journals seleccionados.
    """
    for tick, df in dfs.items():
        quiero_ver(tick, journals, dfs, index=index)







In [13]:
tickers = [
    "TSLA", "NKE", "MSFT", "NVDA", "AAPL", "AMZN", "GOOG", "GOOGL", "META",
]

In [14]:
dfs = web_scrap(tickers)

TSLA ya!
NKE ya!
MSFT ya!
NVDA ya!
AAPL ya!
AMZN ya!
GOOG ya!
GOOGL ya!
META ya!


# **Para un ticker**

In [15]:
cuantas_veces(dfs, "TSLA")

journal,veces
yahoo,57
finviz,20
barrons,13
investors,3
marketwatch,2
youtube,2
wsj,1
aboveavalon,1
digitimes,1


In [16]:
cuantas_veces(dfs, "TSLA", index=1)

Unnamed: 0,journal,veces
0,yahoo,57
1,finviz,20
2,barrons,13
3,investors,3
4,marketwatch,2
5,youtube,2
6,wsj,1
7,aboveavalon,1
8,digitimes,1


In [17]:
journals = ["wsj", "barrons"]

quiero_ver("TSLA", journals, dfs, index=1)

Unnamed: 0,fecha,título
0,---,
1,,wsj
2,Dec-11-25 05:11PM,Rivian Expands Hands-Free Driving in AI Push
3,---,
4,---,barrons
5,05:08PM,"Rivian Went Full Tesla, With One Exception. The Stock Dropped."
6,08:19AM,Tesla Stock Slips. Analysts Are Split on Full-Self Driving Progress.
7,03:53PM,Powell Gave Stock Markets the Present it Wanted. It Could Giftwrap the Santa Claus Rally.
8,07:45AM,Elon Musk Appears to Confirm SpaceX IPO. Here's What Could Happen Next.
9,07:14AM,A SpaceX IPO Could Happen Soon. Here's What Elon Musk Says.


In [18]:
quiero_ver("TSLA", journals, dfs)

fecha,título
---,
,wsj
Dec-11-25 05:11PM,Rivian Expands Hands-Free Driving in AI Push
---,
---,barrons
05:08PM,"Rivian Went Full Tesla, With One Exception. The Stock Dropped."
08:19AM,Tesla Stock Slips. Analysts Are Split on Full-Self Driving Progress.
03:53PM,Powell Gave Stock Markets the Present it Wanted. It Could Giftwrap the Santa Claus Rally.
07:45AM,Elon Musk Appears to Confirm SpaceX IPO. Here's What Could Happen Next.
07:14AM,A SpaceX IPO Could Happen Soon. Here's What Elon Musk Says.


# **Para todos los tickers**

In [19]:
cuantas_veces_todos(dfs)

journal,veces
yahoo,57
finviz,20
barrons,13
investors,3
marketwatch,2
youtube,2
wsj,1
aboveavalon,1
digitimes,1


journal,veces
yahoo,55
finviz,34
wsj,3
businesswire,3
marketwatch,2
barrons,2
youtube,1


journal,veces
yahoo,60
finviz,25
barrons,8
marketwatch,3
youtube,2
digitimes,1
investors,1


journal,veces
yahoo,51
finviz,31
barrons,8
marketwatch,3
digitimes,3
wsj,2
youtube,1
investors,1


journal,veces
yahoo,60
finviz,19
barrons,6
youtube,5
aboveavalon,4
digitimes,3
investors,1
wsj,1
marketwatch,1


journal,veces
yahoo,62
finviz,26
barrons,7
marketwatch,3
investors,1
digitimes,1


journal,veces
yahoo,71
finviz,19
youtube,5
marketwatch,3
digitimes,1
qz,1


journal,veces
yahoo,58
finviz,26
marketwatch,6
youtube,5
barrons,3
digitimes,1
qz,1


journal,veces
yahoo,73
finviz,14
barrons,5
marketwatch,4
youtube,3
investors,1


In [20]:
quiero_ver_todos(journals, dfs)

fecha,título
---,
,wsj
Dec-11-25 05:11PM,Rivian Expands Hands-Free Driving in AI Push
---,
---,barrons
05:08PM,"Rivian Went Full Tesla, With One Exception. The Stock Dropped."
08:19AM,Tesla Stock Slips. Analysts Are Split on Full-Self Driving Progress.
03:53PM,Powell Gave Stock Markets the Present it Wanted. It Could Giftwrap the Santa Claus Rally.
07:45AM,Elon Musk Appears to Confirm SpaceX IPO. Here's What Could Happen Next.
07:14AM,A SpaceX IPO Could Happen Soon. Here's What Elon Musk Says.


fecha,título
---,
,wsj
09:37AM,"Stocks to Watch Wednesday: Royal Bank of Canada, CrowdStrike, Dollar Tree"
Dec-02-25 05:59PM,Nike Shakes Up Leadership Team as Turnaround Plan Continues
12:00PM,Nike CEO on Company's Reinvention Plan
---,
---,barrons
11:35AM,What Stocks Al Gore's Investment Firm Bought and Sold This Quarter
02:00AM,On Stock Doesn't Have a Tariff Problem After All


fecha,título
---,
,wsj
---,
---,barrons
07:22AM,Oracle and the Fed Were Expected to Provide Wall Street With Answers. Only One Delivered.
02:00AM,"Microsoft, Broadcom, Lilly, and Other Stocks That Meet This Pro's Definition of 'Quality'"
04:46PM,"The 3 Best Tech Stocks to Buy for 2026, According to Our Columnist"
09:30AM,Small-Cap Stocks May Be Ready to Outperform After Five Years of Lagging
07:35AM,"Marvell Stock Falls on Microsoft, Amazon Fears. 'Block Out the Noise.'"
02:30AM,Oracle Earnings Face Pressing Questions About OpenAI and Profit Margins


fecha,título
---,
,wsj
11:18AM,"Oracle, Adobe, Broadcom: AI-Related Earnings in Spotlight"
09:38AM,"Stocks to Watch Thursday: Oracle, Broadcom, Gemini Space Station"
---,
---,barrons
10:05AM,The AI Trade Is Tanking as Anxieties Reemerge
07:32AM,Nvidia Stock Falls. 2 Reasons Oracle's Earnings Were Bad for the Chip Maker.
07:22AM,Oracle and the Fed Were Expected to Provide Wall Street With Answers. Only One Delivered.
06:27AM,Buy Stocks on the Dip? Here's a Better Strategy.


fecha,título
---,
,wsj
02:23PM,Apple's Chips Chief Says He Isn't Leaving
---,
---,barrons
02:50PM,Look Past the S&P 500's Market Cap and Focus on Revenue Instead
03:48PM,2 Reasons Tech Stocks Are Rise Past AI Worries
02:00PM,2 Reasons Meta and Other Tech Stocks Can Rise Past AI Worries
12:52PM,Tech Stocks Are Rebounding. Two Reasons It Can Continue.
08:17AM,Walmart Stock Now Trades on the Nasdaq. Its Tech Transformation Is Real.


fecha,título
---,
,wsj
---,
---,barrons
02:50PM,Look Past the S&P 500's Market Cap and Focus on Revenue Instead
01:24PM,Roku Stock Is a Top Pick for 2026 at 2 Wall Street Firms. Here's Why.
12:30AM,Can Costco's Earnings Report Silence Skeptics? What Could Revive the Stock.
06:09PM,Amazon Expands Same-Day Grocery Delivery. Rival Stocks Are Falling.
03:32PM,It's a Big Day for Cisco Stock. It's on Track for Highest Close Since Dot-Com Bubble Burst.
09:30AM,Small-Cap Stocks May Be Ready to Outperform After Five Years of Lagging


fecha,título
---,
,wsj
---,
---,barrons


fecha,título
---,
,wsj
---,
---,barrons
08:19AM,Tesla Stock Slips. Analysts Are Split on Full-Self Driving Progress.
07:46AM,"IonQ, D-Wave, and Rigetti Stocks Are a Buy, Say Analysts. How the Quantum Pure Plays Stack Up."
07:22AM,Oracle and the Fed Were Expected to Provide Wall Street With Answers. Only One Delivered.


fecha,título
---,
,wsj
---,
---,barrons
12:53PM,Meta's AI Strategy Is Stuck in No Man's Land. So Is the Stock.
09:30AM,Small-Cap Stocks May Be Ready to Outperform After Five Years of Lagging
08:40AM,A Key Moment for AI Stocks Comes This Week-and It Has Nothing to Do With Nvidia
03:48PM,2 Reasons Tech Stocks Are Rise Past AI Worries
02:00PM,2 Reasons Meta and Other Tech Stocks Can Rise Past AI Worries
