<a href="https://colab.research.google.com/github/jatoogunhyo/sec_edgar/blob/main/US_listed_comp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 필요한 환경설정

In [None]:
!pip install finance-datareader
import FinanceDataReader as fdr

!pip install yfinance
import yfinance as yf

import pandas as pd

import warnings

# SettingWithCopyWarning 비활성화
warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)

import time
from tqdm.notebook import tqdm



# FinanceDataReader를 사용하여 상장종목 목록 전체를 불러오기

❗FinanceDataReader에서 제공하는 티커중에 B 클래스 같은 주식에 대해서 하이픈(-)이 없는 경우가 있으므로 수동으로 수정 필요

✅ 현재까지 확인된 종목
* BRKB → BRK-B : S&P500, NYSE
* BFB → BF-B : S&P500, NYSE

## S&P 500

In [None]:
# S&P 500 종목 전체를 수집
df_spx = fdr.StockListing('S&P500')

# 식별된 티커명 필요사항 수정
# Symbol 칼럼 값 수정
df_spx['Symbol'] = df_spx['Symbol'].replace({'BRKB': 'BRK-B', 'BFB': 'BF-B'})

# 확인
print('S&P500 : '+str(len(df_spx)))
df_spx.head()

S&P500 : 503


Unnamed: 0,Symbol,Name,Sector,Industry
0,MMM,3M,Industrials,Industrial Conglomerates
1,AOS,A. O. Smith,Industrials,Building Products
2,ABT,Abbott Laboratories,Health Care,Health Care Equipment
3,ABBV,AbbVie,Health Care,Biotechnology
4,ACN,Accenture,Information Technology,IT Consulting & Other Services


## NASDAQ

In [None]:
# NASDAQ 종목 전체를 수집
df_nasdaq = fdr.StockListing('NASDAQ')
print('\nNASDAQ : '+str(len(df_nasdaq)))
df_nasdaq.head()

100%|██████████| 3639/3639 [00:24<00:00, 149.38it/s]


NASDAQ : 3639





Unnamed: 0,Symbol,Name,IndustryCode,Industry
0,AAPL,Apple Inc,57106020,전화 및 소형 장치
1,NVDA,NVIDIA Corp,57101010,반도체
2,MSFT,Microsoft Corp,57201020,소프트웨어
3,AMZN,Amazon.com Inc,53402010,백화점
4,META,Meta Platforms Inc,57201030,온라인 서비스


## NYSE

In [None]:
# NYSE 종목 전체를 수집
df_nyse = fdr.StockListing('NYSE')

# 식별된 티커명 필요사항 수정
# Symbol 칼럼 값 수정
df_nyse['Symbol'] = df_nyse['Symbol'].replace({'BRKB': 'BRK-B', 'BFB': 'BF-B'})

# 확인
print('\nNYSE : '+str(len(df_nyse)))
df_nyse.head()

100%|██████████| 2739/2739 [00:18<00:00, 148.70it/s]


NYSE : 2739





Unnamed: 0,Symbol,Name,IndustryCode,Industry
0,TSM,Taiwan Semiconductor Manufacturing Co Ltd ADR,57101010,반도체
1,LLY,Eli Lilly and Co,56201040,제약
2,WMT,Walmart Inc,54301020,식품 소매 및 유통
3,JPM,JPMorgan Chase & Co,55101010,은행
4,V,Visa Inc,57201030,온라인 서비스


## AMEX

In [None]:
# AMEX 종목 전체를 수집
df_amex = fdr.StockListing('AMEX')
print('\nAMEX : '+str(len(df_amex)))
df_amex.head()

100%|██████████| 315/315 [00:02<00:00, 116.26it/s]


AMEX : 315





Unnamed: 0,Symbol,Name,IndustryCode,Industry
0,IMO,Imperial Oil Ltd,50102030,"오일, 가스 정제 및 마케팅"
1,CBOE,Cboe Global Markets Inc,55102050,"금융, 상품 시장 운영 및 서비스 제공"
2,PHYS,Sprott Physical Gold Trust,55501030,폐쇄형 펀드
3,PSLV,Sprott Physical Silver Trust USD,55501030,폐쇄형 펀드
4,CEF,Sprott Physical Gold and Silver Trust USD,55501030,폐쇄형 펀드


# yfinance를 이용하여 티커별 정보 불러오기
💰 Yahoo Finance에서 제공하는 데이터별 출처

| Data                                             | Provider                                |
|--------------------------------------------------|-----------------------------------------|
| US IPO data                                      | NYSE and NASDAQ                         |
| Upgrades and downgrades                          | Benzinga                                |
| Sustainability data                              | Sustainalytics and Morningstar          |
| Company profile data                             | S&P Global Market Intelligence          |
| Top institutional and mutual fund holders        | Vickers-stock.com                       |
| International historical chart data and updates | Morningstar                             |
| Corporate governance scores                      | Institutional Shareholder Services      |
| US equities and global index historical data    | Commodity Systems, Inc.                 |
| Financial statements, valuation ratios, market cap, and shares outstanding data | Morningstar    |
| Analyst estimates, earnings, corporate and economic events, non-US IPO, and insider transactions data | LSEG Data and Analytics |


In [None]:
# 가져올 데이터 열 정의
columns_to_add = [
    'country', 'marketCap', 'exchange',
    'sectorKey','industryKey',
    'dividendYield', 'numberOfAnalystOpinions', 'priceToBook', 'forwardPE'
]

# 각 열에 대해 빈 열 추가
for col in columns_to_add:
    df_spx[col] = None

In [None]:
# S&P 500
# Symbol별로 데이터 가져오기 및 열 추가
for symbol in tqdm(df_spx['Symbol'], desc="Fetching data from Yahoo Finance"):
    try:
        # Yahoo Finance에서 데이터 가져오기
        info = yf.Ticker(symbol).get_info()
        time.sleep(0.1)
        # 각 열에 데이터 추가
        for col in columns_to_add:
            df_spx.loc[df_spx['Symbol'] == symbol, col] = info.get(col, None)
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

Fetching data from Yahoo Finance:   0%|          | 0/503 [00:00<?, ?it/s]

In [None]:
df_spx.sort_values(by = 'marketCap', ascending=False)

Unnamed: 0,Symbol,Name,Sector,Industry,country,marketCap,exchange,sectorKey,industryKey,dividendYield,numberOfAnalystOpinions,priceToBook,forwardPE
39,AAPL,Apple Inc.,Information Technology,"Technology Hardware, Storage & Peripherals",United States,3668604616704,NMS,technology,consumer-electronics,0.0041,42,64.427925,29.224627
348,NVDA,Nvidia,Information Technology,Semiconductors,United States,3431294042112,NMS,technology,semiconductors,0.0003,53,80.3383,31.473722
318,MSFT,Microsoft,Information Technology,Systems Software,United States,3156552712192,NMS,technology,software-infrastructure,0.0078,50,10.972527,28.282543
20,GOOG,Alphabet Inc. (Class C),Communication Services,Interactive Media & Services,United States,2382116683776,NMS,communication-services,internet-content-information,0.0041,17,7.628548,21.791088
19,GOOGL,Alphabet Inc. (Class A),Communication Services,Interactive Media & Services,United States,2382113275904,NMS,communication-services,internet-content-information,0.0041,47,7.572326,21.63049
...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,HII,Huntington Ingalls Industries,Industrials,Aerospace & Defense,United States,7421263360,NYQ,industrials,aerospace-defense,0.0285,12,1.765692,12.124064
93,CE,Celanese,Materials,Specialty Chemicals,United States,7098721792,NYQ,basic-materials,chemicals,0.0424,18,0.9755,7.181912
69,BWA,BorgWarner,Consumer Discretionary,Automotive Parts & Equipment,United States,6897798144,NYQ,consumer-cyclical,auto-parts,0.014,17,1.120506,6.864521
81,CZR,Caesars Entertainment,Consumer Discretionary,Casinos & Gaming,United States,6795110400,NMS,consumer-cyclical,resorts-casinos,,17,1.622691,25.73346


In [None]:
df_spx[df_spx.exchange.isna()]

Unnamed: 0,Symbol,Name,Sector,Industry,country,marketCap,exchange,sectorKey,industryKey,dividendYield,numberOfAnalystOpinions,priceToBook,forwardPE
