<a href="https://colab.research.google.com/github/ajmtrz/proyecto_etfs_eu/blob/main/Copia_de_Proyecto_1_Hack_A_Boss.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---

# Proyecto 1: Hack a Boss
## Análisis del Top 10 ETFs

Proyecto de análisis del Top 10 de ETFs.

### Documentación del Proyecto

1. [Presentación Paula](https://docs.google.com/presentation/d/1Wk4tVhK89EP7b4iiIhvcTk4n5uHJkxgnuADb6Dk_Nto/edit?usp=sharing)

### Páginas datos fundamentales
1. [MorningStar](https://www.morningstar.es/es/)
2. [Dataroma](https://www.dataroma.com/m/home.php)

---





# Google Drive
## Esta celda monta **Google Drive** para que podamos guardar los archivos csv y cualquier otro archivo que se vaya generando.
### IMPORTANTE (Se crea y se monta la carpeta *Bootcamp_Proyecto1* en **vuestro** Google Drive)

In [None]:
import os
from google.colab import drive

# Se monta la carpeta principal de tu Google Drive
drive.mount('/content/drive', force_remount=True)
# Se define la carpeta de trabajo
carpeta_trabajo = '/content/drive/My Drive/Bootcamp_Proyecto1/archivos'
# Se comprueba si existe, si no, se crea
if not os.path.exists(carpeta_trabajo):
    os.makedirs(carpeta_trabajo, exist_ok=True)
# Se establece la carpeta como directorio de trabajo
os.chdir(carpeta_trabajo)
# Comprobación
print(f"El sistema se encuentra en {os.getcwd()}")

# Importación de **librerías**
### Incluid aquí las librerías a instalar e importar

In [None]:
# Instalar las librerías
!pip install yfinance

In [2]:
# Importar las librerías
import time
import requests
import numpy as np
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from bs4 import BeautifulSoup
from pprint import pprint

# Sección Antonio Jesús


1. Extracción de datos vía API
2. Datos sobre el riesgo y rentabilidad
3. Portafolio de inversión de los ETFs
4. Distribución de porcentajes de sectores





In [3]:
# Definición de símbolos
tickers = [
        "VXUS", "SCHD", "SPGP", "FNDF", "VOT", "VCIT", "FLOT", "MUB", "AVUV", "ESGS"
    ]

In [6]:
def down_price_vol(tickers:list) -> pd.DataFrame:
    '''
    Esta función descarga los cambios porcentuales de las cotizaciones
    y del volumen de negociación de la lista de símbolos en un dataframe.

    Parámetros:
    tickers (list): La lista de símbolos.

    Retorna:
    DataFrame con los incrementos porcentuales de las cotizaciones

    Ejemplo:
    >>> df_price_vol = down_price_vol(['MSFT'])
    '''
    # Descarga de las cotizaciones
    df = yf.download(tickers, period='max')
    # Obtener los nombres de las columnas para 'Adj Close' y 'Volume'
    adj_close_cols = [('Adj Close', ticker) for ticker in tickers]
    volume_cols = [('Volume', ticker) for ticker in tickers]

    # Seleccionar las columnas de interés
    df = df[adj_close_cols + volume_cols]
    # Eliminar valores nulos y cambiar los precios por incrementos porcentuales
    df = df.dropna().pct_change(1).dropna()
    # Ordenar y formatear el las columnas
    df.columns = [f'{j}.{i}' for i, j in df.columns]
    columns = [col for etf in tickers for col in [f'{etf}.Adj Close', f'{etf}.Volume']]
    df = df[columns]
    df.columns = [col.replace('.Adj Close', '.price').replace('.Volume', '.volume') for col in df.columns]
    return df

def web_scraping_sectors(tickers:list) -> pd.DataFrame:
    '''
    Esta función hace web scraping a la página de Yahoo Finance
    para obtener la distribución en los sectores en los que está
    invertido el ETF

    Parámetros:
    tickers (list): La lista de símbolos.

    Retorna:
    DataFrame con los porcentajes de la distribución de sectores

    Ejemplo:
    >>> df_sectores = web_scraping_sectores()
    '''
    url_base = "https://es.finance.yahoo.com/quote/"
    lista_sectores = [
        "Materiales básicos", "Acciones cíclicas", "Servicios financieros",
        "Propiedades inmobiliarias", "Acciones defensivas", "Atención sanitaria",
        "Utilidades", "Servicios de comunicación", "Energía", "Industriales",
        "Tecnología"
    ]
    dict_sectores = {}
    for etf in tickers:
        url = f'{url_base}{etf}/holdings?p={etf}'
        response = requests.get(
            url,
            headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
            )
        soup = BeautifulSoup(response.text, 'html.parser')
        sectores_etf = {}
        lista_contenedores = soup.findAll('div', class_='Bdbw(1px) Bdbc($seperatorColor) Bdbs(s) H(25px) Pt(10px)')
        for item in lista_contenedores:
            sector = item.find('span', class_='Mend(5px) Whs(nw)').text
            if sector in lista_sectores:
                valor = item.find('span', class_='W(20%) D(b) Fl(start) Ta(e)').text
                sectores_etf[sector] = valor
        dict_sectores[etf] = sectores_etf
        time.sleep(0.5)
    # Retornar DataFrame
    return pd.DataFrame.from_dict(dict_sectores)

In [7]:
df_price_vol = down_price_vol(tickers)
df_sect_dist = web_scraping_sectors(tickers)

[*********************100%%**********************]  10 of 10 completed


In [9]:
df_sect_dist

Unnamed: 0,VXUS,SCHD,SPGP,FNDF,VOT,VCIT,FLOT,MUB,AVUV,ESGS
Materiales básicos,"8,00%","1,93%","14,07%","10,68%","2,60%",,,,"7,63%","1,03%"
Acciones cíclicas,"11,54%","9,25%","7,25%","13,28%","8,84%",,,,"17,84%","6,51%"
Servicios financieros,"19,62%","15,14%","3,35%","18,02%","6,01%",,,,"25,13%","10,37%"
Propiedades inmobiliarias,"3,19%","0,00%","2,48%","1,44%","6,67%",,,,"1,00%","0,00%"
Acciones defensivas,"7,63%","12,80%","3,26%","7,84%","2,03%",,,,"3,07%","14,02%"
Atención sanitaria,"9,40%","16,64%","11,32%","6,12%","14,55%",,,,"1,72%","10,08%"
Utilidades,"3,02%","0,32%","0,00%","4,34%","0,60%",,,,"0,01%","7,63%"
Servicios de comunicación,"5,31%","4,50%","1,34%","5,54%","5,09%",,,,"1,88%","8,12%"
Energía,"5,85%","10,00%","27,09%","9,60%","6,68%",,,,"18,61%","15,40%"
Industriales,"14,61%","17,71%","10,75%","15,68%","16,72%",,,,"18,02%","8,82%"


# Sección Paula

In [None]:
tickers = ["VXUS", "SCHD", "SPGP", "FNDF", "VOT", "VCIT", "FLOT", "MUB", "AVUV", "ESGS"]

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
}

all_dataframes = []

for ticker in tickers:
    print(f"Fetching tables for {ticker}...")

    url = f"https://finance.yahoo.com/quote/{ticker}/holdings?p={ticker}"
    response = requests.get(url, headers=headers)

    try:
        tables = pd.read_html(response.text)
        for table in tables:
            table['Ticker'] = ticker  # Add the 'Ticker' column
            all_dataframes.append(table)
    except ValueError:
        print(f"No tables found for {ticker}")

# Concatenate
merged_df = pd.concat(all_dataframes, ignore_index=True)

display(merged_df)


Fetching tables for VXUS...
Fetching tables for SCHD...
Fetching tables for SPGP...
Fetching tables for FNDF...
Fetching tables for VOT...
Fetching tables for VCIT...
No tables found for VCIT
Fetching tables for FLOT...
Fetching tables for MUB...
No tables found for MUB
Fetching tables for AVUV...
Fetching tables for ESGS...


Unnamed: 0,Name,Symbol,% Assets,Ticker
0,Taiwan Semiconductor Manufacturing Co Ltd,2330.TW,1.42%,VXUS
1,Nestle SA,NESN.SW,1.10%,VXUS
2,Novo Nordisk A/S Class B,NOVO-B.CO,1.03%,VXUS
3,Tencent Holdings Ltd,0700.HK,0.88%,VXUS
4,Samsung Electronics Co Ltd,005930.KS,0.87%,VXUS
...,...,...,...,...
66,The Home Depot Inc,HD,4.23%,ESGS
67,Comcast Corp Class A,CMCSA,3.69%,ESGS
68,Amgen Inc,AMGN,2.92%,ESGS
69,ConocoPhillips,COP,2.92%,ESGS


In [None]:
url = "https://finance.yahoo.com/quote/VFSUX?p=VFSUX"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
}

response = requests.get(url, headers=headers)

try:
    tables = pd.read_html(response.text)
    for i, table in enumerate(tables):
        print(f"Table {i}")
        display(table)
except ValueError:
    print("No tables found on the page.")


Table 0


Unnamed: 0,0,1
0,Previous Close,9.91
1,YTD Return,2.20%
2,Expense Ratio (net),0.20%
3,Category,Short-Term Bond
4,Last Cap Gain,-8.00
5,Morningstar Rating,★★★★★
6,Morningstar Risk Rating,Average
7,Sustainability Rating,


Table 1


Unnamed: 0,0,1
0,Net Assets,55.35B
1,Beta (5Y Monthly),0.48
2,Yield,2.92%
3,5y Average Return,
4,Holdings Turnover,77.00%
5,Last Dividend,0.23
6,Average for Category,
7,Inception Date,"Oct 29, 1982"


# Sección Karlos

# Sección Josep

In [None]:
df


In [None]:
df_describe = df.describe()
df_describe
