In [40]:
import yfinance as yf
from bs4 import BeautifulSoup
import pandas as pd
import requests
import os
from sqlalchemy import create_engine, MetaData, Table, Column, String, Float
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.orm import declarative_base, sessionmaker

### 1. WebScraping de los bancos que pertenecen a la bolsa de valores de estados unidos

In [28]:
urlBankStocks = 'https://finance.yahoo.com/u/yahoo-finance/watchlists/bank-and-financial-services-stocks/'
headersRequest = {
    'User-Agent': 'Safari'
}

response = requests.get(urlBankStocks, headers=headersRequest)
response.raise_for_status() 

soup = BeautifulSoup(response.content, 'html.parser')
section = soup.find('section', {'data-test': 'cwl-symbols'})
table = section.find('table')

In [79]:
headers = [th.get_text() for th in table.find_all('th')]
rows = []
for tr in table.find_all('tr')[1:]: 
    cells = tr.find_all('td')
    row = [cell.get_text(strip=True) for cell in cells]
    rows.append(row)

df = pd.DataFrame(rows, columns=headers)
df.sample(10)

Unnamed: 0,Symbol,Company Name,Last Price,Change,% Change,Market Time,Volume,Avg Vol (3 month),Market Cap
23,TFC,Truist Financial Corporation,43.67,0.72,+1.68%,4:00 PM EDT,6.45M,7.88M,58.48B
6,HSBC,HSBC Holdings plc,44.72,0.35,+0.79%,4:00 PM EDT,671.21k,1.45M,162.65B
21,BNS,The Bank of Nova Scotia,53.54,0.03,+0.06%,4:00 PM EDT,803.96k,1.62M,66.47B
17,PNC,"The PNC Financial Services Group, Inc.",189.38,0.58,+0.31%,4:00 PM EDT,1.46M,1.74M,75.17B
28,BCS,Barclays PLC,12.84,-0.1,-0.77%,4:00 PM EDT,9.31M,13.89M,46.58B
15,SAN,"Banco Santander, S.A.",5.04,0.05,+1.00%,4:00 PM EDT,2.05M,2.40M,77.42B
12,UBS,UBS Group AG,32.83,0.27,+0.83%,4:00 PM EDT,1.02M,1.67M,105.13B
22,IBKR,"Interactive Brokers Group, Inc.",148.81,-0.62,-0.41%,4:00 PM EDT,766.55k,938.88k,62.92B
29,NWG,NatWest Group plc,9.52,-0.02,-0.21%,4:00 PM EDT,1.36M,2.48M,39.63B
20,BMO,Bank of Montreal,94.06,0.48,+0.51%,4:00 PM EDT,538.86k,1.08M,68.81B


In [75]:
host = os.getenv("DB_HOST", "localhost")         
database = os.getenv("POSTGRES_DB", "mydb")      
user = os.getenv("POSTGRES_USER", "user")        
password = os.getenv("POSTGRES_PASSWORD", "pass") 
port = os.getenv("POSTGRES_PORT", "5432")        


engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')
engine

Engine(postgresql://sib_user:***@localhost:5432/source)

In [85]:
df['Last Price'] = pd.to_numeric(df['Last Price'])

df.to_sql('banks_stocks', con=engine, if_exists='replace', index=False)

30

### 2. Buscar la informacion en yfinance de estos bancos dado el symbol en la tabla banks_stocks

In [89]:
query = 'SELECT * FROM banks_stocks'
result = pd.read_sql(query, engine)
result.sample(10)

Unnamed: 0,Symbol,Company Name,Last Price,Change,% Change,Market Time,Volume,Avg Vol (3 month),Market Cap
28,BCS,Barclays PLC,12.84,-0.1,-0.77%,4:00 PM EDT,9.31M,13.89M,46.58B
10,C,Citigroup Inc.,62.85,-1.12,-1.75%,4:00 PM EDT,13.58M,13.06M,118.87B
18,UNCRY,UniCredit S.p.A.,22.11,0.09,+0.41%,3:55 PM EDT,15.62k,209.59k,69.21B
21,BNS,The Bank of Nova Scotia,53.54,0.03,+0.06%,4:00 PM EDT,803.96k,1.62M,66.47B
20,BMO,Bank of Montreal,94.06,0.48,+0.51%,4:00 PM EDT,538.86k,1.08M,68.81B
14,SMFG,"Sumitomo Mitsui Financial Group, Inc.",13.15,0.1,+0.77%,4:00 PM EDT,599.32k,1.54M,85.80B
2,WFC,Wells Fargo & Company,64.36,-0.02,-0.03%,4:00 PM EDT,19.21M,17.06M,219.07B
3,MS,Morgan Stanley,121.06,1.62,+1.36%,4:00 PM EDT,5.89M,6.02M,195.15B
15,SAN,"Banco Santander, S.A.",5.04,0.05,+1.00%,4:00 PM EDT,2.05M,2.40M,77.42B
26,BK,The Bank of New York Mellon Corporation,76.67,-1.08,-1.39%,4:00 PM EDT,4.83M,4.06M,55.75B
