## Webscrapping


### Parte 1

In [2]:
pip install selenium pandas yfinance matplotlib seaborn beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import yfinance as yf
import numpy as np
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

In [4]:
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--lang=en-US")

driver = webdriver.Chrome(chrome_options)
print("Todo bien")

Todo bien


In [5]:
url = "https://finance.yahoo.com/markets/stocks/gainers"

driver.get(url)

stocks_data = []

table = "table.yf-7uw1qi.bd"

try:
    print("Esperando")
    WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located(( By.CSS_SELECTOR, table))
    )
    print("Tabla encontrada")
except TimeoutException:
    print("La página aún no carga")
    driver.quit()

Esperando
Tabla encontrada


In [6]:
row_selector = "tr.row.yf-7uw1qi"

while True:
    rows = driver.find_elements(By.CSS_SELECTOR, row_selector)
    print(f"Se encontraron {len(rows)} filas en esta página")

    for row in rows:
        try:
            symbol = row.find_element(By.CSS_SELECTOR, "span.symbol.yf-90gdtp").text
            name = row.find_element(By.CSS_SELECTOR, "div.companyName").text

            stocks_data.append({
                "Simbolo": symbol,
                "Nombre": name
            })
            print(f"{symbol} | {name}")
        except Exception as e:
            print(f"Error en la fila: {e}")

    try:
        pasopagina = driver.find_element( By.XPATH, '/html/body/div[2]/main/section/section/section/section/section[1]/div/div[3]/div[3]/button[3]')
                
        if not pasopagina.is_enabled():
            print("Ya no hay más páginas, fin del scraping.")
            break

        pasopagina.click()
        time.sleep(5)

    except Exception as e:
        print("No se encontró el botón de siguiente. Fin del scraping.")
        break


Se encontraron 25 filas en esta página
AMBA | Ambarella, Inc.
IREN | IREN Limited
BABA | Alibaba Group Holding Limited
AFRM | Affirm Holdings, Inc.
DOOO | BRP Inc.
ADSK | Autodesk, Inc.
CIFR | Cipher Mining Inc.
SATS | EchoStar Corporation
S | SentinelOne, Inc.
EQX | Equinox Gold Corp.
NXE | NexGen Energy Ltd.
MIAX | Miami International Holdings, Inc.
NG | NovaGold Resources Inc.
PRVA | Privia Health Group, Inc.
SSRM | SSR Mining Inc.
HL | Hecla Mining Company
CELH | Celsius Holdings, Inc.
HMY | Harmony Gold Mining Company Limited
OS | OneStream, Inc.
CNXC | Concentrix Corporation
BIDU | Baidu, Inc.
CDE | Coeur Mining, Inc.
JOYY | JOYY Inc.
GH | Guardant Health, Inc.
BTDR | Bitdeer Technologies Group
Se encontraron 25 filas en esta página
COO | The Cooper Companies, Inc.
GFI | Gold Fields Limited
GSAT | Globalstar, Inc.
SBSW | Sibanye Stillwater Limited
IAG | IAMGOLD Corporation
SOUN | SoundHound AI, Inc.
HCC | Warrior Met Coal, Inc.
NGD | New Gold Inc.
BHC | Bausch Health Companies In

### Parte 2

In [7]:
symbols = [stock["Simbolo"] for stock in stocks_data]

historical_data = yf.download( tickers=symbols, period="1y", interval="1mo", auto_adjust= False)

adjclose = historical_data["Adj Close"]

print(adjclose.head())

[*********************100%***********************]  52 of 52 completed

Ticker            ADSK        AEM       AFRM       AMBA        BABA  \
Date                                                                  
2024-09-01  275.480011  79.566811  40.820000  56.410000  104.356186   
2024-10-01  283.799988  85.226173  43.849998  56.189999   96.351486   
2024-11-01  291.899994  83.379227  70.010002  71.550003   85.917831   
2024-12-01  295.570007  77.617378  60.900002  72.739998   83.380714   
2025-01-01  311.339996  92.235771  61.070000  76.720001   97.197182   

Ticker           BF-A       BF-B   BHC        BIDU       BILI  ...          S  \
Date                                                           ...              
2024-09-01  46.925602  48.019775  8.16  105.290001  23.379999  ...  23.920000   
2024-10-01  42.737293  43.180275  9.20   91.230003  22.120001  ...  25.790001   
2024-11-01  40.560219  41.267910  8.37   85.050003  19.170000  ...  27.950001   
2024-12-01  36.961182  37.247028  8.06   84.309998  18.110001  ...  22.200001   
2025-01-01  32.8




### Parte 3

In [8]:
f_6m = adjclose.iloc[:6]
l_6m = adjclose.iloc[6:]

cum_returns = (f_6m.iloc[-1] / f_6m.iloc[0]) - 1

top10_symbols = cum_returns.sort_values(ascending=False).head(10).index.tolist()
print("Top 10 seleccionadas:", top10_symbols)

monthly_returns = adjclose[top10_symbols].pct_change(fill_method=None).dropna()

l6_returns = monthly_returns.iloc[6:]

print("\nRetornos mensuales (últimos 6 meses):")
print(l6_returns)

weights = np.repeat(1/10, 10) 

portfolio_returns = l6_returns.dot(weights)

print("\nRendimiento mensual del portafolio (últimos 6 meses):")
print(portfolio_returns)

portfolio_returns.to_csv("salida/portfolio_returns.csv")

Top 10 seleccionadas: ['SOUN', 'GH', 'SSRM', 'BTDR', 'AFRM', 'UPWK', 'PRVA', 'JOYY', 'SATS', 'BABA']

Retornos mensuales (últimos 6 meses):
Ticker          SOUN        GH      SSRM      BTDR      AFRM      UPWK  \
Date                                                                     
2025-04-01  0.144089  0.108685  0.060818  0.083805  0.101129  0.007663   
2025-05-01  0.088267 -0.139953  0.111842  0.343783  0.043006  0.177947   
2025-06-01  0.061325  0.281142  0.076923 -0.107309  0.332177 -0.132343   
2025-07-01 -0.037279 -0.212529 -0.062009  0.122822 -0.008389 -0.110119   
2025-08-01  0.260407  0.645193  0.615900  0.108611  0.290105  0.286789   

Ticker          PRVA      JOYY      SATS      BABA  
Date                                                
2025-04-01  0.045880 -0.019533 -0.121188 -0.096801  
2025-05-01 -0.030664  0.190772 -0.211299 -0.046806  
2025-06-01  0.010545  0.063283  0.562324 -0.003777  
2025-07-01 -0.151304  0.005009  0.176534  0.081641  
2025-08-01  0.180328  0