In [348]:
import yfinance as yf
from bs4 import BeautifulSoup
import pandas as pd
import requests
import os
from sqlalchemy import create_engine
from datetime import datetime
import time


### 1. WebScraping de los bancos que pertenecen a la bolsa de valores de estados unidos

In [359]:
urlBankStocks = 'https://finance.yahoo.com/u/yahoo-finance/watchlists/bank-and-financial-services-stocks/'
headersRequest = {
    'User-Agent': 'Safari'
}

response = requests.get(urlBankStocks, headers=headersRequest)
response.raise_for_status() 

soup = BeautifulSoup(response.content, 'html.parser')
section = soup.find('section', {'data-test': 'cwl-symbols'})
table = section.find('table')

In [360]:
headers = [th.get_text() for th in table.find_all('th')]
rows = []
for tr in table.find_all('tr')[1:]: 
    cells = tr.find_all('td')
    row = [cell.get_text(strip=True) for cell in cells]
    rows.append(row)

df = pd.DataFrame(rows, columns=headers)
df.sample(10)

Unnamed: 0,Symbol,Company Name,Last Price,Change,% Change,Market Time,Volume,Avg Vol (3 month),Market Cap
16,USB,U.S. Bancorp,49.16,-0.81,-1.62%,4:00 PM EDT,9.62M,7.95M,76.71B
23,TFC,Truist Financial Corporation,43.67,0.72,+1.68%,4:00 PM EDT,6.45M,7.84M,58.48B
6,HSBC,HSBC Holdings plc,44.72,0.35,+0.79%,4:00 PM EDT,671.21k,1.45M,162.65B
29,NWG,NatWest Group plc,9.52,-0.02,-0.21%,4:00 PM EDT,1.36M,2.49M,39.63B
1,BAC,Bank of America Corporation,42.32,-0.28,-0.66%,4:00 PM EDT,25.72M,39.08M,325.39B
19,NU,Nu Holdings Ltd.,14.45,0.29,+2.05%,4:00 PM EDT,14.01M,40.75M,69.21B
13,TD,The Toronto-Dominion Bank,56.35,-0.07,-0.12%,4:00 PM EDT,3.11M,2.51M,98.61B
24,BBVA,"Banco Bilbao Vizcaya Argentaria, S.A.",10.08,0.21,+2.13%,4:00 PM EDT,818.94k,1.47M,58.09B
25,ITUB,Itaú Unibanco Holding S.A.,6.18,-0.03,-0.48%,4:00 PM EDT,11.89M,19.27M,56.52B
27,ING,ING Groep N.V.,17.38,0.11,+0.64%,4:00 PM EDT,835.28k,1.72M,54.69B


In [380]:
host = os.getenv("DB_HOST", "localhost")         
database = os.getenv("POSTGRES_DB2", "landing_zone")      
user = os.getenv("POSTGRES_USER", "user")        
password = os.getenv("POSTGRES_PASSWORD", "pass") 
port = os.getenv("POSTGRES_PORT", "5432")        


engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')
engine

Engine(postgresql://sib_user:***@localhost:5432/landing_zone)

In [367]:
df['Last Price'] = pd.to_numeric(df['Last Price'])

df.to_sql('banks_stocks', con=engine, if_exists='replace', index=False)

30

### 2. Buscar la informacion en yfinance de estos bancos dado el symbol en la tabla banks_stocks

In [381]:
query = 'SELECT * FROM banks_stocks'
result = pd.read_sql(query, engine)
symbols_list = result['Symbol'].unique().tolist()
symbols_list[0:5]

['JPM', 'BAC', 'WFC', 'MS', 'RY']

In [382]:
def get_basic_info(ticker):
    
    info = ticker.info
    return {
        'symbol': ticker.ticker,
        'company_name': info.get('longName'),
        'industry': info.get('industry'),
        'sector': info.get('sector'),
        'employee_count': info.get('fullTimeEmployees'),
        'city': info.get('city'),
        'phone': info.get('phone'),
        'state': info.get('state'),
        'country': info.get('country'),
        'website': info.get('website'),
        'address': info.get('address1'),
        'created_at': datetime.now(),
        'updated_at': datetime.now()
    }

In [383]:
def get_price_stock(ticker, interval='1d'):
    
    historical_data = ticker.history(period=interval)
    historical_data.reset_index(inplace=True)
    historical_data['symbol'] = ticker.ticker
    historical_data['created_at'] = datetime.now()
    historical_data['updated_at'] = datetime.now()

    return historical_data

In [384]:
def get_fundamental_stock(ticker):
    info = ticker.info
    total_assets = ticker.balance_sheet.loc['Total Assets'].iloc[0] if 'Total Assets' in ticker.balance_sheet.index else None
    invested_capital = ticker.balance_sheet.loc['Invested Capital'].iloc[0] if 'Invested Capital' in ticker.balance_sheet.index else None
    return {
        'symbol': ticker.ticker,
        'assets': total_assets,
        'debt': info.get('totalDebt'),
        'invested_capital': invested_capital,
        'shares_issued': info.get('sharesOutstanding'),
        'created_at': datetime.now(),
        'updated_at': datetime.now()
    }

In [385]:
def get_shares_holders_info(ticker):
    holders = ticker.institutional_holders[['Date Reported', 'Holder', 'Shares', 'Value']]
    
    holders['symbol'] = ticker.ticker
    holders['created_at'] = datetime.now()
    holders['updated_at'] = datetime.now()

    return holders

In [386]:
def get_calificadores(ticker,  start_year=2023, end_year=2024):
    data = ticker.upgrades_downgrades
    
    data.reset_index(inplace=True)

    data['GradeDate'] = pd.to_datetime(data['GradeDate'])
    data_filtered = data[
        (data['GradeDate'].dt.year >= start_year) &
        (data['GradeDate'].dt.year <= end_year)
    ].copy()
    
    data_filtered['symbol'] = ticker.ticker
    data_filtered['created_at'] = datetime.now()
    data_filtered['updated_at'] = datetime.now()
    
    return data_filtered

In [387]:
basic_info_list = []
price_dfs = []
fundamentals_dfs = []
holders_dfs = []
calificadores_dfs = []

for symbol in symbols_list:
    stock = yf.Ticker(symbol)
    
    print('A buscar informacion de: '+symbol)

    # Obtener informacion básica del stock
    basic_info_list.append( get_basic_info(stock) )
    
    # Obtener Precios Diarios en Bolsa
    price_dfs.append( get_price_stock(stock) )

    # Obtener Información Fundamental del stock
    fundamentals_dfs.append( get_fundamental_stock(stock) )

    # Obtener Información sobre los Holders
    holders_dfs.append( get_shares_holders_info(stock) )

    # Obtener los calificadores
    calificadores_dfs.append( get_calificadores(stock) )


A buscar informacion de: JPM
A buscar informacion de: BAC
A buscar informacion de: WFC
A buscar informacion de: MS
A buscar informacion de: RY
A buscar informacion de: GS
A buscar informacion de: HSBC
A buscar informacion de: HDB
A buscar informacion de: SCHW
A buscar informacion de: MUFG
A buscar informacion de: C
A buscar informacion de: IBN
A buscar informacion de: UBS
A buscar informacion de: TD
A buscar informacion de: SMFG
A buscar informacion de: SAN
A buscar informacion de: USB
A buscar informacion de: PNC
A buscar informacion de: UNCRY
A buscar informacion de: NU
A buscar informacion de: BMO
A buscar informacion de: IBKR
A buscar informacion de: TFC
A buscar informacion de: BBVA
A buscar informacion de: ITUB
A buscar informacion de: BK
A buscar informacion de: ING
A buscar informacion de: BCS
A buscar informacion de: NWG


In [388]:
# Convertir a DataFrame para la información básica
basic_info_df = pd.DataFrame(basic_info_list)

# Convertir a DataFrame para la información de Fundamental
fundamentals_df = pd.DataFrame(fundamentals_dfs)

# Concatenar todos los DataFrames de precios en uno solo
all_price_data = pd.concat(price_dfs, ignore_index=True)

# Concatenar todos los DataFrames de holders en uno solo
all_holders_data = pd.concat(holders_dfs, ignore_index=True)

# Convertir a DataFrame para la información de los calificadores
calificadores_df = pd.concat(calificadores_dfs, ignore_index=True)

In [376]:
calificadores_df

Unnamed: 0,GradeDate,Firm,ToGrade,FromGrade,Action,symbol,creation_dt
0,2024-10-14 15:34:37,Oppenheimer,Outperform,Outperform,main,JPM,2024-10-19 13:19:23.973359
1,2024-10-14 14:22:21,Barclays,Overweight,Overweight,main,JPM,2024-10-19 13:19:23.973359
2,2024-10-14 13:48:16,RBC Capital,Outperform,Outperform,main,JPM,2024-10-19 13:19:23.973359
3,2024-10-14 12:59:32,Evercore ISI Group,Outperform,Outperform,main,JPM,2024-10-19 13:19:23.973359
4,2024-10-02 13:55:16,Oppenheimer,Outperform,Outperform,main,JPM,2024-10-19 13:19:23.973359
...,...,...,...,...,...,...,...
1095,2023-01-03 12:23:16,Barclays,Overweight,,main,BK,2024-10-19 13:19:40.875645
1096,2024-06-07 13:28:13,Barclays,Overweight,Equal-Weight,up,ING,2024-10-19 13:19:41.727203
1097,2023-10-26 10:07:12,B of A Securities,Underperform,Neutral,down,BCS,2024-10-19 13:19:42.319238
1098,2023-09-26 09:02:23,Morgan Stanley,Overweight,Equal-Weight,up,BCS,2024-10-19 13:19:42.319238


In [389]:
# Cargar la información básica
basic_info_df.to_sql('bank_basic_info', con=engine, if_exists='append', index=False)

# Cargar los datos de precios
all_price_data.to_sql('daily_stock_prices', con=engine, if_exists='append', index=False)

# Cargar la información de fundamentales
fundamentals_df.to_sql('bank_fundamentals', con=engine, if_exists='append', index=False)

# Cargar la información de holders
all_holders_data.to_sql('stock_holders', con=engine, if_exists='append', index=False)

# Cargar la información de los calificadores
calificadores_df.to_sql('stock_calificadores', con=engine, if_exists='append', index=False)

100