In [11]:
import pandas as pd
import numpy as np

import yfinance as yf

import time
from tqdm import tqdm

# 브라우저(TLS) 환경을 흉내내는 세션 생성
from curl_cffi import requests
session = requests.Session(impersonate="chrome")

# S&P500 종목 티커, 섹터/산업 정보

In [12]:
# 위키피디아 S&P500 종목 리스트
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
tables = pd.read_html(url)

# 첫 번째 테이블이 S&P500 종목
sp500_table = tables[0]

# 필요한 칼럼만 추출 (Symbol, Security, GICS Sector, GICS Sub-Industry)
df = sp500_table[['Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry']]

# 칼럼명 변경
df_sp500 = df.rename(columns={
    "Symbol": "티커",
    "Security": "종목명",
    "GICS Sector": "GICS섹터",
    "GICS Sub-Industry": "GICS세부산업군"
})

df_sp500.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/US_data/sp500_sector.csv", index=False)
df_sp500.head()

Unnamed: 0,티커,종목명,GICS섹터,GICS세부산업군
0,MMM,3M,Industrials,Industrial Conglomerates
1,AOS,A. O. Smith,Industrials,Building Products
2,ABT,Abbott Laboratories,Health Care,Health Care Equipment
3,ABBV,AbbVie,Health Care,Biotechnology
4,ACN,Accenture,Information Technology,IT Consulting & Other Services


# all info 로드

In [13]:
def make_summary_df(data: dict):
    # dict에서 필요한 값 뽑기
    row = {
        "티커": data.get("symbol"),
        "종목명": data.get("longName") or data.get("shortName"),
        "시장구분": data.get("fullExchangeName"),
        "웹사이트": data.get("website"),
        "증권구분": data.get("quoteType"),
        "yf섹터": data.get("sector"),

        "종가": data.get("regularMarketPrice"),
        "시가": data.get("regularMarketOpen") or data.get("open"),
        "고가": data.get("regularMarketDayHigh") or data.get("dayHigh"),
        "저가": data.get("regularMarketDayLow") or data.get("dayLow"),
        "52주최고": data.get("fiftyTwoWeekHigh"),
        "52주최저": data.get("fiftyTwoWeekLow"),
        "PER": data.get("trailingPE"),
        "PBR": data.get("priceToBook"),
        "베타": data.get("beta"),
        "배당성향": data.get("payoutRatio"),

        # "자산": data.get("totalAssets"),
        # "부채": data.get("totalLiab") or data.get("totalDebt"),
        # "자본": data.get("totalStockholderEquity"),
        # "매출액": data.get("totalRevenue"),
        # "영업이익": data.get("operatingIncome"),
        # "당기순이익": data.get("netIncomeToCommon"),

        "거래량": data.get("regularMarketVolume") or data.get("volume"),
        "거래대금": (data.get("regularMarketPrice") or 0) * (data.get("regularMarketVolume") or 0),
        "시가총액": data.get("marketCap")
    }

    return pd.DataFrame([row])


tickers = df_sp500["티커"].tolist()
results = []
for t in tqdm(tickers, desc="S&P500 일반정보 로드 중"):
    try:
        ticker = yf.Ticker(t, session=session)
        data = ticker.get_info()   # dict
        df = make_summary_df(data) # DataFrame 한 줄
        results.append(df)
    except Exception as e:
        print(f"{t} 불러오기 실패: {e}")
    time.sleep(0.1)

# 모든 종목 결과 합치기
final_df = pd.concat(
    [df for df in results if not df.empty and not df.isna().all().all()], 
    ignore_index=True
)

final_df.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/US_data/sp500_all_info.csv", index=False)
final_df.head()

S&P500 일반정보 로드 중: 100%|██████████| 503/503 [05:57<00:00,  1.41it/s]
  final_df = pd.concat(


Unnamed: 0,티커,종목명,시장구분,웹사이트,증권구분,yf섹터,종가,시가,고가,저가,52주최고,52주최저,PER,PBR,베타,배당성향,거래량,거래대금,시가총액
0,MMM,3M Company,NYSE,https://www.3m.com,EQUITY,Industrials,154.34,152.84,155.72,152.05,164.15,121.98,21.436111,19.163149,1.104,0.3972,1703146,262863600.0,82206113792
1,AOS,A. O. Smith Corporation,NYSE,https://www.aosmith.com,EQUITY,Industrials,72.82,72.3,73.38,72.08,92.06,58.83,20.284122,5.527974,1.22,0.3733,764215,55650140.0,10204266496
2,ABT,Abbott Laboratories,NYSE,https://www.abbott.com,EQUITY,Healthcare,131.25,129.92,131.72,128.74,141.23,110.68,16.468006,4.517606,0.705,0.2861,3876258,508758900.0,228435378176
3,ABBV,AbbVie Inc.,NYSE,https://www.abbvie.com,EQUITY,Healthcare,206.19,206.53,207.795,205.1009,218.66,163.81,97.72038,-1982.5962,0.503,3.0381,3836548,791057800.0,364247023616
4,ACN,Accenture plc,NYSE,https://www.accenture.com,EQUITY,Technology,255.88,250.53,257.58,250.395,398.35,236.67,20.372612,5.215123,1.29,0.4558,4540074,1161714000.0,159375622144


# 재무 정보 로드

In [None]:
def make_simple_financial_df(ticker_symbol):
    ticker = yf.Ticker(ticker_symbol)

    # --- 재무상태표 (Balance Sheet) ---
    bs = ticker.balance_sheet
    bs_col = bs.columns[0] if not bs.empty else None

    # --- 손익계산서 (Income Statement) ---
    fs = ticker.financials
    fs_col = fs.columns[0] if not fs.empty else None

    row = {
        "티커": ticker.ticker,
        # 우선순위: BS의 첫 번째 컬럼(결산일) → 없으면 IS의 첫 번째 컬럼
        "결산일": str(bs_col) if bs_col is not None else str(fs_col),
        # --- 자산, 부채, 자본 ---
        "자산": bs.loc["Total Assets", bs_col] if "Total Assets" in bs.index else np.nan,
        "부채": bs.loc["Total Debt", bs_col] if "Total Debt" in bs.index else np.nan,
        "자본": bs.loc["Total Equity Gross Minority Interest", bs_col] if "Total Equity Gross Minority Interest" in bs.index else np.nan,
        # --- 매출액, 영업이익, 당기순이익 ---
        "매출액": fs.loc["Total Revenue", fs_col] if "Total Revenue" in fs.index else np.nan,
        "영업이익": fs.loc["Operating Income", fs_col] if "Operating Income" in fs.index else np.nan,
        "당기순이익": fs.loc["Net Income", fs_col] if "Net Income" in fs.index else np.nan,
    }

    return pd.DataFrame([row])


# 실행 예시
tickers = df_sp500["티커"].tolist()
results = []

for t in tqdm(tickers, desc="S&P500 통합 재무데이터 로드 중"):
    try:
        df_one = make_simple_financial_df(t)
        results.append(df_one)
    except Exception as e:
        print(f"{t} 처리 실패: {e}")
    time.sleep(0.1)  # API 호출 속도 제한 방지

df_final = pd.concat(results, ignore_index=True)

df_final.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/US_data/sp500_all_financial.csv", index=False)
df_final.head()

S&P500 통합 재무데이터 로드 중: 100%|██████████| 503/503 [05:29<00:00,  1.53it/s]


Unnamed: 0,티커,결산일,자산,부채,자본,매출액,영업이익,당기순이익
0,MMM,2024-12-31 00:00:00,3.986800e+10,1.365900e+10,3.894000e+09,2.457500e+10,4.822000e+09,4.173000e+09
1,AOS,2024-12-31 00:00:00,3.240000e+09,2.167000e+08,1.883500e+09,3.818100e+09,7.168000e+08,5.336000e+08
2,ABT,2024-12-31 00:00:00,8.141400e+10,1.502100e+10,4.790100e+10,4.195000e+10,6.825000e+09,1.340200e+10
3,ABBV,2024-12-31 00:00:00,1.351610e+11,6.714400e+10,3.364000e+09,5.633400e+10,1.189400e+10,4.278000e+09
4,ACN,2024-08-31 00:00:00,5.593236e+10,4.120549e+09,2.916825e+10,6.489646e+10,9.595847e+09,7.264787e+09
...,...,...,...,...,...,...,...,...
498,XYL,2024-12-31 00:00:00,1.649300e+10,2.125000e+09,1.088200e+10,8.562000e+09,1.071000e+09,8.900000e+08
499,YUM,2024-12-31 00:00:00,6.727000e+09,1.228600e+10,-7.648000e+09,7.549000e+09,2.403000e+09,1.486000e+09
500,ZBRA,2024-12-31 00:00:00,7.968000e+09,2.283000e+09,3.586000e+09,4.981000e+09,7.650000e+08,5.280000e+08
501,ZBH,2024-12-31 00:00:00,2.136530e+10,6.204600e+09,1.247620e+10,7.678600e+09,1.528300e+09,9.038000e+08


Unnamed: 0,티커,결산일,자산,부채,자본,매출액,영업이익,당기순이익
0,MMM,2024-12-31 00:00:00,39868000000.0,13659000000.0,3894000000.0,24575000000.0,4822000000.0,4173000000.0
1,AOS,2024-12-31 00:00:00,3240000000.0,216700000.0,1883500000.0,3818100000.0,716800000.0,533600000.0
2,ABT,2024-12-31 00:00:00,81414000000.0,15021000000.0,47901000000.0,41950000000.0,6825000000.0,13402000000.0
3,ABBV,2024-12-31 00:00:00,135161000000.0,67144000000.0,3364000000.0,56334000000.0,11894000000.0,4278000000.0
4,ACN,2024-08-31 00:00:00,55932360000.0,4120549000.0,29168250000.0,64896460000.0,9595847000.0,7264787000.0
