<a href="https://colab.research.google.com/github/paulandrepamm/proyecto_etfs_eu/blob/main/Copia_de_Proyecto_1_Hack_A_Boss.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---

# Proyecto 1: Hack a Boss
## Análisis del Top 10 ETFs

Proyecto de análisis del Top 10 de ETFs.

### Documentación del Proyecto

1. [Presentación Paula](https://docs.google.com/presentation/d/1Wk4tVhK89EP7b4iiIhvcTk4n5uHJkxgnuADb6Dk_Nto/edit?usp=sharing)

### Páginas datos fundamentales
1. [MorningStar](https://www.morningstar.es/es/)
2. [Dataroma](https://www.dataroma.com/m/home.php)

---





# Google Drive
## Esta celda monta **Google Drive** para que podamos guardar los archivos csv y cualquier otro archivo que se vaya generando.
### IMPORTANTE (Se crea y se monta la carpeta *Bootcamp_Proyecto1* en **vuestro** Google Drive)

In [None]:
import os
from google.colab import drive

# Se monta la carpeta principal de tu Google Drive
drive.mount('/content/drive', force_remount=True)
# Se define la carpeta de trabajo
carpeta_trabajo = '/content/drive/My Drive/Bootcamp_Proyecto1/archivos'
# Se comprueba si existe, si no, se crea
if not os.path.exists(carpeta_trabajo):
    os.makedirs(carpeta_trabajo, exist_ok=True)
# Se establece la carpeta como directorio de trabajo
os.chdir(carpeta_trabajo)
# Comprobación
print(f"El sistema se encuentra en {os.getcwd()}")

# Importación de **librerías**
### Incluid aquí las librerías a instalar e importar

In [26]:
# Instalar las librerías
!pip install yfinance
!pip install yahooquery




In [3]:
# Importar las librerías
import time
import requests
import numpy as np
import yfinance as yf
import yahooquery as yq
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from bs4 import BeautifulSoup
from pprint import pprint

# Sección Antonio Jesús


1. Extracción de datos vía API
2. Datos sobre el riesgo y rentabilidad
3. Portafolio de inversión de los ETFs
4. Distribución de porcentajes de sectores





In [9]:
# Obtención de los símbolos del Top ETFs de EEUU
s = yq.Screener()
dict_query = s.get_screeners(['top_etfs_us'])
tickers = [symbol['symbol'] for symbol in dict_query['top_etfs_us']['quotes']]

In [None]:
def down_price_vol(tickers:list) -> pd.DataFrame:
    '''
    Esta función descarga los cambios porcentuales de las cotizaciones
    y del volumen de negociación de la lista de símbolos en un dataframe.

    Parámetros:
    tickers (list): La lista de símbolos.

    Retorna:
    DataFrame con los incrementos porcentuales de las cotizaciones

    Ejemplo:
    >>> df_price_vol = down_price_vol(['MSFT'])
    '''
    # Descarga de las cotizaciones
    df = yf.download(tickers, period='max')
    # Obtener los nombres de las columnas para 'Adj Close' y 'Volume'
    adj_close_cols = [('Adj Close', ticker) for ticker in tickers]
    volume_cols = [('Volume', ticker) for ticker in tickers]

    # Seleccionar las columnas de interés
    df = df[adj_close_cols + volume_cols]
    # Eliminar valores nulos y cambiar los precios por incrementos porcentuales
    df = df.dropna().pct_change(1).dropna()
    # Ordenar y formatear el las columnas
    df.columns = [f'{j}.{i}' for i, j in df.columns]
    columns = [col for etf in tickers for col in [f'{etf}.Adj Close', f'{etf}.Volume']]
    df = df[columns]
    df.columns = [col.replace('.Adj Close', '.price').replace('.Volume', '.volume') for col in df.columns]
    return df

def web_scraping_sectors(tickers:list) -> pd.DataFrame:
    '''
    Esta función hace web scraping a la página de Yahoo Finance
    para obtener la distribución en los sectores en los que está
    invertido el ETF

    Parámetros:
    tickers (list): La lista de símbolos.

    Retorna:
    DataFrame con los porcentajes de la distribución de sectores

    Ejemplo:
    >>> df_sectores = web_scraping_sectores()
    '''
    url_base = "https://es.finance.yahoo.com/quote/"
    lista_sectores = [
        "Materiales básicos", "Acciones cíclicas", "Servicios financieros",
        "Propiedades inmobiliarias", "Acciones defensivas", "Atención sanitaria",
        "Utilidades", "Servicios de comunicación", "Energía", "Industriales",
        "Tecnología"
    ]
    dict_sectores = {}
    for etf in tickers:
        url = f'{url_base}{etf}/holdings?p={etf}'
        response = requests.get(
            url,
            headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
            )
        soup = BeautifulSoup(response.text, 'html.parser')
        sectores_etf = {}
        lista_contenedores = soup.findAll('div', class_='Bdbw(1px) Bdbc($seperatorColor) Bdbs(s) H(25px) Pt(10px)')
        for item in lista_contenedores:
            sector = item.find('span', class_='Mend(5px) Whs(nw)').text
            if sector in lista_sectores:
                valor = item.find('span', class_='W(20%) D(b) Fl(start) Ta(e)').text
                sectores_etf[sector] = valor
        dict_sectores[etf] = sectores_etf
        time.sleep(0.5)
    # Retornar DataFrame
    return pd.DataFrame.from_dict(dict_sectores)

In [None]:
df_price_vol = down_price_vol(tickers)
df_sect_dist = web_scraping_sectors(tickers)

# Sección Paula

In [14]:
#Net assets table

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
}

net_assets = pd.DataFrame()

for ticker in tickers:

    url = f"https://finance.yahoo.com/quote/{ticker}?p={ticker}"
    response = requests.get(url, headers=headers)

    try:
        tables = pd.read_html(response.text)
        for table in tables:
            labels_to_remove = ["Previous Close", "Open", "Bid", "Ask", "Day's Range", "52 Week Range", "Volume","Avg. Volume"]
            table = table[~table.iloc[:, 0].isin(labels_to_remove)]

            data_series = pd.Series(dict(zip(table.iloc[:, 0], table.iloc[:, 1])))

            net_assets[ticker] = data_series
    except ValueError:
        print(f"No hay tablas para {ticker}")

net_assets = net_assets.T

display(net_assets)



  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)
  tables = pd.read_html(response.text)


Unnamed: 0,Net Assets,NAV,PE Ratio (TTM),Yield,YTD Daily Total Return,Beta (5Y Monthly),Expense Ratio (net),Inception Date
DXJS,51.73M,28.39,6.06,2.60%,33.69%,0.26,0.58%,2013-06-28
FLJH,249.66M,30.81,14.03,0.86%,31.21%,0.5,0.09%,2017-11-02
HEWJ,215.64M,34.13,14.06,0.59%,31.28%,0.54,0.50%,2014-01-31
DXJ,3.12B,85.57,9.73,2.81%,39.04%,0.5,0.48%,2006-06-16
DBJP,289.07M,58.7,15.16,5.15%,27.20%,0.53,0.47%,2011-06-09
PTH,132.12M,30.26,6.8,0.00%,-26.18%,0.74,0.60%,2006-10-12
XTL,56.78M,66.09,21.44,0.81%,-18.47%,1.06,0.35%,2011-01-26
XLRE,4.09B,33.11,25.36,3.89%,-8.13%,1.05,0.10%,2015-10-07
ICF,1.99B,48.83,27.0,3.10%,-9.10%,1.04,0.33%,2001-01-29
HSCZ,98.33M,26.89,11.49,1.39%,5.92%,0.72,0.42%,2015-06-29


In [11]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
}

all_dataframes = []

for ticker in tickers:
    print(f"Fetching tables for {ticker}...")

    url = f"https://finance.yahoo.com/quote/{ticker}/holdings?p={ticker}"
    response = requests.get(url, headers=headers)

    try:
        tables = pd.read_html(response.text)
        for table in tables:
            table['Ticker'] = ticker  # Add the 'Ticker' column
            all_dataframes.append(table)
    except ValueError:
        print(f"No tables found for {ticker}")

# Concatenate all dataframes
merged_df = pd.concat(all_dataframes, ignore_index=True)

display(merged_df)


Fetching tables for DXJS...


  tables = pd.read_html(response.text)


Fetching tables for FLJH...


  tables = pd.read_html(response.text)


Fetching tables for HEWJ...


  tables = pd.read_html(response.text)


Fetching tables for DXJ...


  tables = pd.read_html(response.text)


Fetching tables for DBJP...


  tables = pd.read_html(response.text)


Fetching tables for PTH...


  tables = pd.read_html(response.text)


Fetching tables for XTL...


  tables = pd.read_html(response.text)


Fetching tables for XLRE...


  tables = pd.read_html(response.text)


Fetching tables for ICF...


  tables = pd.read_html(response.text)


Fetching tables for HSCZ...


  tables = pd.read_html(response.text)


Fetching tables for XSD...


  tables = pd.read_html(response.text)


Fetching tables for IHI...


  tables = pd.read_html(response.text)


Fetching tables for PSCT...


  tables = pd.read_html(response.text)


Fetching tables for XAR...


  tables = pd.read_html(response.text)


Fetching tables for PPA...


  tables = pd.read_html(response.text)


Fetching tables for CIBR...


  tables = pd.read_html(response.text)


Fetching tables for CNRG...


  tables = pd.read_html(response.text)


Fetching tables for PSR...


  tables = pd.read_html(response.text)


Fetching tables for PTF...


  tables = pd.read_html(response.text)


Fetching tables for ITB...


  tables = pd.read_html(response.text)


Fetching tables for PXE...


  tables = pd.read_html(response.text)


Fetching tables for PFFA...


  tables = pd.read_html(response.text)


Fetching tables for EWJV...


  tables = pd.read_html(response.text)


Fetching tables for PHO...


  tables = pd.read_html(response.text)


Fetching tables for GSJY...


  tables = pd.read_html(response.text)


Unnamed: 0,Name,Symbol,% Assets,Ticker
0,Kobe Steel Ltd,5406.T,1.75%,DXJS
1,Cosmo Energy Holdings Co Ltd,5021.T,0.92%,DXJS
2,Tokyo Seimitsu Co Ltd,7729.T,0.69%,DXJS
3,Aozora Bank Ltd,8304.T,0.68%,DXJS
4,Seven Bank Ltd,8410.T,0.64%,DXJS
...,...,...,...,...
228,Sumitomo Mitsui Financial Group Inc,8316.T,1.67%,GSJY
229,Mitsui & Co Ltd,8031.T,1.66%,GSJY
230,Keyence Corp,6861.T,1.61%,GSJY
231,Takeda Pharmaceutical Co Ltd,4502.T,1.59%,GSJY


Collecting airtable-python-wrapper
  Downloading airtable_python_wrapper-0.15.3-py2.py3-none-any.whl (12 kB)
Installing collected packages: airtable-python-wrapper
Successfully installed airtable-python-wrapper-0.15.3


# Sección Karlos

# Sección Josep

In [None]:
df


In [None]:
df_describe = df.describe()
df_describe
