In [5]:
# !pip install yfinance==0.2.65

import pandas as pd
import numpy as np

import yfinance as yf

import time
from tqdm import tqdm

# 브라우저(TLS) 환경을 흉내내는 세션 생성
from curl_cffi import requests
session = requests.Session(impersonate="chrome")

# S&P500 종목 티커, 섹터/산업 정보

In [None]:
# 위키피디아 S&P500 종목 리스트
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
tables = pd.read_html(url)

# 첫 번째 테이블이 S&P500 종목
sp500_table = tables[0]

# 필요한 칼럼만 추출 (Symbol, Security, GICS Sector, GICS Sub-Industry)
df = sp500_table[['Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry']]

# 칼럼명 변경
df_sp500 = df.rename(columns={
    "Symbol": "티커",
    "Security": "종목명",
    "GICS Sector": "GICS섹터",
    "GICS Sub-Industry": "GICS세부산업군"
})

df_sp500.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/US_data/sp500_sector.csv", index=False)
df_sp500.head()

Unnamed: 0,티커,종목명,GICS섹터,GICS세부산업군
0,MMM,3M,Industrials,Industrial Conglomerates
1,AOS,A. O. Smith,Industrials,Building Products
2,ABT,Abbott Laboratories,Health Care,Health Care Equipment
3,ABBV,AbbVie,Health Care,Biotechnology
4,ACN,Accenture,Information Technology,IT Consulting & Other Services


# all info 로드

In [8]:
def make_summary_df(data: dict):
    # dict에서 필요한 값 뽑기
    row = {
        "티커": data.get("symbol"),
        "종목명": data.get("longName") or data.get("shortName"),
        "시장구분": data.get("fullExchangeName"),
        "웹사이트": data.get("website"),
        "증권구분": data.get("quoteType"),
        "yf섹터": data.get("sector"),

        "종가": data.get("regularMarketPrice"),
        "시가": data.get("regularMarketOpen") or data.get("open"),
        "고가": data.get("regularMarketDayHigh") or data.get("dayHigh"),
        "저가": data.get("regularMarketDayLow") or data.get("dayLow"),
        "52주최고": data.get("fiftyTwoWeekHigh"),
        "52주최저": data.get("fiftyTwoWeekLow"),
        "PER": data.get("trailingPE"),
        "PBR": data.get("priceToBook"),
        "베타": data.get("beta"),
        "배당성향": data.get("payoutRatio"),

        # "자산": data.get("totalAssets"),
        # "부채": data.get("totalLiab") or data.get("totalDebt"),
        # "자본": data.get("totalStockholderEquity"),
        # "매출액": data.get("totalRevenue"),
        # "영업이익": data.get("operatingIncome"),
        # "당기순이익": data.get("netIncomeToCommon"),

        "거래량": data.get("regularMarketVolume") or data.get("volume"),
        "거래대금": (data.get("regularMarketPrice") or 0) * (data.get("regularMarketVolume") or 0),
        "시가총액": data.get("marketCap")
    }

    return pd.DataFrame([row])


tickers = df_sp500["티커"].tolist()
results = []
for t in tqdm(tickers, desc="S&P500 일반정보 로드 중"):
    try:
        ticker = yf.Ticker(t, session=session)
        data = ticker.get_info()   # dict
        df = make_summary_df(data) # DataFrame 한 줄
        results.append(df)
    except Exception as e:
        print(f"{t} 불러오기 실패: {e}")
    time.sleep(1)

# 모든 종목 결과 합치기
final_df = pd.concat(
    [df for df in results if not df.empty and not df.isna().all().all()], 
    ignore_index=True
)

final_df.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/US_data/sp500_all_info_23.csv", index=False)
final_df.head()

S&P500 일반정보 로드 중: 100%|██████████| 503/503 [12:01<00:00,  1.44s/it]
  final_df = pd.concat(


Unnamed: 0,티커,종목명,시장구분,웹사이트,증권구분,yf섹터,종가,시가,고가,저가,52주최고,52주최저,PER,PBR,베타,배당성향,거래량,거래대금,시가총액
0,MMM,3M Company,NYSE,https://www.3m.com,EQUITY,Industrials,158.72,155.75,159.11,155.045,164.15,121.98,22.01387,19.706978,1.104,0.3972,1936938,307430800.0,84539039744
1,AOS,A. O. Smith Corporation,NYSE,https://www.aosmith.com,EQUITY,Industrials,73.61,72.13,73.845,71.91,92.06,58.83,20.504179,5.587945,1.22,0.3733,674080,49619030.0,10314969088
2,ABT,Abbott Laboratories,NYSE,https://www.abbott.com,EQUITY,Healthcare,132.59,132.77,133.62,131.34,141.23,110.86,16.636135,4.563728,0.705,0.2861,4130146,547616100.0,230767591424
3,ABBV,AbbVie Inc.,NYSE,https://www.abbvie.com,EQUITY,Healthcare,210.6,210.84,211.47,207.12,218.66,163.81,99.81043,-2025.0,0.503,3.0381,4817905,1014651000.0,372037550080
4,ACN,Accenture plc,NYSE,https://www.accenture.com,EQUITY,Technology,259.22,254.69,262.165,254.4701,398.35,236.67,20.622116,5.283196,1.29,0.4558,3102369,804196100.0,161455947776


# 재무 정보 로드

In [None]:
def make_simple_financial_df(ticker_symbol):
    ticker = yf.Ticker(ticker_symbol)

    # --- 재무상태표 (Balance Sheet) ---
    bs = ticker.balance_sheet
    bs_col = bs.columns[0] if not bs.empty else None

    # --- 손익계산서 (Income Statement) ---
    fs = ticker.financials
    fs_col = fs.columns[0] if not fs.empty else None

    row = {
        "티커": ticker.ticker,
        # 우선순위: BS의 첫 번째 컬럼(결산일) → 없으면 IS의 첫 번째 컬럼
        "결산일": str(bs_col) if bs_col is not None else str(fs_col),
        # --- 자산, 부채, 자본 ---
        "자산": bs.loc["Total Assets", bs_col] if "Total Assets" in bs.index else np.nan,
        "부채": bs.loc["Total Debt", bs_col] if "Total Debt" in bs.index else np.nan,
        "자본": bs.loc["Total Equity Gross Minority Interest", bs_col] if "Total Equity Gross Minority Interest" in bs.index else np.nan,
        # --- 매출액, 영업이익, 당기순이익 ---
        "매출액": fs.loc["Total Revenue", fs_col] if "Total Revenue" in fs.index else np.nan,
        "영업이익": fs.loc["Operating Income", fs_col] if "Operating Income" in fs.index else np.nan,
        "당기순이익": fs.loc["Net Income", fs_col] if "Net Income" in fs.index else np.nan,
    }

    return pd.DataFrame([row])


# 실행 예시
tickers = df_sp500["티커"].tolist()
results = []

for t in tqdm(tickers, desc="S&P500 통합 재무데이터 로드 중"):
    try:
        df_one = make_simple_financial_df(t)
        results.append(df_one)
    except Exception as e:
        print(f"{t} 처리 실패: {e}")
    time.sleep(0.1)  # API 호출 속도 제한 방지

df_final = pd.concat(results, ignore_index=True)

df_final.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/US_data/sp500_all_financial.csv", index=False)
df_final.head()

In [None]:
import yfinance as yf
import pandas as pd

def get_financial_statements(ticker_symbol):
    ticker = yf.Ticker(ticker_symbol)
    
    # --- 재무상태표 (Balance Sheet) ---
    bs = ticker.balance_sheet
    if not bs.empty:
        bs_df = bs.transpose().reset_index().rename(columns={"index": "Date"})
        bs_df["Ticker"] = ticker_symbol   # ✅ 티커 추가
    else:
        bs_df = pd.DataFrame()
    
    # --- 손익계산서 (Income Statement) ---
    fs = ticker.financials
    if not fs.empty:
        fs_df = fs.transpose().reset_index().rename(columns={"index": "Date"})
        fs_df["Ticker"] = ticker_symbol   # ✅ 티커 추가
    else:
        fs_df = pd.DataFrame()
    
    return bs_df, fs_df

# 사용 예시
bs_df, fs_df = get_financial_statements("AAPL")

# print("📊 재무상태표")
# print(bs_df.head(), "\n")

print("📊 손익계산서")
fs_df

In [None]:
fs_df.columns

In [None]:
col_map = {
    "결산일": "결산일",
    "Tax Effect Of Unusual Items": "특이항목 세금효과",
    "Tax Rate For Calcs": "계산용 세율",
    "Normalized EBITDA": "정상화 EBITDA",
    "Net Income From Continuing Operation Net Minority Interest": "계속영업 순이익(비지배지분 포함)",
    "Reconciled Depreciation": "조정 감가상각비",
    "Reconciled Cost Of Revenue": "조정 매출원가",
    "EBITDA": "EBITDA",
    "EBIT": "EBIT",
    "Net Interest Income": "순이자이익",
    "Interest Expense": "이자비용",
    "Interest Income": "이자수익",
    "Normalized Income": "정상화 순이익",
    "Net Income From Continuing And Discontinued Operation": "계속/중단영업 순이익",
    "Total Expenses": "총비용",
    "Total Operating Income As Reported": "보고된 영업이익",
    "Diluted Average Shares": "희석주식수(평균)",
    "Basic Average Shares": "기본주식수(평균)",
    "Diluted EPS": "희석주당순이익",
    "Basic EPS": "기본주당순이익",
    "Diluted NI Availto Com Stockholders": "희석 NI(보통주주 귀속)",
    "Net Income Common Stockholders": "순이익(보통주주 귀속)",
    "Net Income": "당기순이익",
    "Net Income Including Noncontrolling Interests": "당기순이익(비지배지분 포함)",
    "Net Income Continuous Operations": "계속영업 순이익",
    "Tax Provision": "법인세 비용",
    "Pretax Income": "법인세차감전이익",
    "Other Income Expense": "기타손익",
    "Other Non Operating Income Expenses": "기타영업외손익",
    "Net Non Operating Interest Income Expense": "영업외 순이자손익",
    "Interest Expense Non Operating": "영업외 이자비용",
    "Interest Income Non Operating": "영업외 이자수익",
    "Operating Income": "영업이익",
    "Operating Expense": "영업비용",
    "Research And Development": "연구개발비",
    "Selling General And Administration": "판매관리비",
    "Gross Profit": "매출총이익",
    "Cost Of Revenue": "매출원가",
    "Total Revenue": "매출액",
    "Operating Revenue": "영업수익",
    "티커": "티커"
}