# 전 세계 주식 데이터 수집하기

## 유료 데이터 벤더 이용하기

[https://api.tiingo.com/](https://api.tiingo.com)

In [1]:
import keyring

In [2]:
# download data
from tiingo import TiingoClient
import pandas as pd
import keyring

api_key = keyring.get_password('tiingo', 'ahn283')
config = {}
config['session'] = True
config['api_key'] = api_key
client = TiingoClient(config)

In [3]:
# list_stock_tickers() 메서드를 통해 티커 정보를 받아올 수 있다.
tickers = client.list_stock_tickers()
tickers_df = pd.DataFrame.from_records(tickers)

tickers_df.head()

Unnamed: 0,ticker,exchange,assetType,priceCurrency,startDate,endDate
0,-P-H,NYSE,Stock,USD,,
1,-P-S,NYSE,Stock,USD,2018-08-22,2023-05-05
2,000001,SHE,Stock,CNY,2007-01-04,2024-03-01
3,000002,SHE,Stock,CNY,2007-01-04,2024-03-01
4,000003,SHE,Stock,CNY,,


In [4]:
# 거래시장에 따른 데이터 구분 (마이너 거래소나 장외 거래소 이용 불가)
tickers_df.groupby(['exchange', 'priceCurrency'])['ticker'].count()

exchange   priceCurrency
           USD               2459
AMEX       USD                 80
ASX        AUD                169
           USD               2171
BATS       USD                 22
CSE        USD                 32
EXPM       USD               2045
LSE        USD                 12
NASDAQ     USD              12678
NMFQS      USD                 36
NYSE       USD               7759
NYSE ARCA  USD                 66
NYSE MKT   USD                465
NYSE NAT   USD                  3
OTCBB      USD                650
OTCCE      USD               1101
OTCGREY    USD               4147
OTCMKTS    USD               1189
OTCQB      USD               1254
OTCQX      USD                760
PINK       USD              15358
SHE        CNY               3387
           HKD                 12
SHEB       HKD                 42
SHG        CNY               2954
           USD                  6
SHGB       USD                 44
Name: ticker, dtype: int64

In [5]:
# APPL 주식 상세 정보 확인
ticker_metadata = client.get_ticker_metadata("AAPL")
print(ticker_metadata)

{'ticker': 'AAPL', 'name': 'Apple Inc', 'description': "Apple Inc. (Apple) designs, manufactures and markets mobile communication and media devices, personal computers, and portable digital music players, and a variety of related software, services, peripherals, networking solutions, and third-party digital content and applications. The Company's products and services include iPhone, iPad, Mac, iPod, Apple TV, a portfolio of consumer and professional software applications, the iOS and OS X operating systems, iCloud, and a variety of accessory, service and support offerings. The Company also delivers digital content and applications through the iTunes Store, App StoreSM, iBookstoreSM, and Mac App Store. The Company distributes its products worldwide through its retail stores, online stores, and direct sales force, as well as through third-party cellular network carriers, wholesalers, retailers, and value-added resellers. In February 2012, the Company acquired app-search engine Chomp.", 

In [6]:
# 주가 정보
# divCash : 현금배당, splitFactor : 주식분할 조정계수
historical_prices = client.get_dataframe("AAPL", 
                                         startDate='2017-08-01',
                                         frequency='daily')
historical_prices.head()

Unnamed: 0_level_0,close,high,low,open,volume,adjClose,adjHigh,adjLow,adjOpen,adjVolume,divCash,splitFactor
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-08-01 00:00:00+00:00,150.05,150.22,148.41,149.1,24725526,35.156718,35.196549,34.772466,34.934133,98902104,0.0,1.0
2017-08-02 00:00:00+00:00,157.14,159.75,156.16,159.28,69222793,36.817905,37.429428,36.588291,37.319307,276891172,0.0,1.0
2017-08-03 00:00:00+00:00,155.57,157.21,155.02,157.05,26000738,36.450054,36.834306,36.321189,36.796818,104002952,0.0,1.0
2017-08-04 00:00:00+00:00,156.39,157.4,155.69,156.07,20349532,36.64218,36.878823,36.47817,36.567204,81398128,0.0,1.0
2017-08-07 00:00:00+00:00,158.81,158.92,156.6701,157.06,21870321,37.209186,37.234959,36.707807,36.799161,87481284,0.0,1.0


In [7]:
# 일별 가치지표 (무료 계정은 다우존스 30 지수에 포함되는 종목만 제공)
fundamentals_daily = client.get_fundamentals_daily('AAPL')
fundamentals_daily_df = pd.DataFrame.from_records(fundamentals_daily)

fundamentals_daily_df.head()

Unnamed: 0,date,marketCap,enterpriseVal,peRatio,pbRatio,trailingPEG1Y
0,2021-03-09T00:00:00.000Z,2058663000000.0,2093880000000.0,32.201833,31.086361,0.922143
1,2021-03-10T00:00:00.000Z,2039876000000.0,2075093000000.0,31.907965,30.802673,0.913728
2,2021-03-11T00:00:00.000Z,2073540000000.0,2108757000000.0,32.434534,31.311002,0.928807
3,2021-03-12T00:00:00.000Z,2057728000000.0,2092945000000.0,32.187206,31.072241,0.921725
4,2021-03-15T00:00:00.000Z,2108053000000.0,2143270000000.0,32.974401,31.832167,0.944267


In [8]:
# financial statements (재무제표)
fundamentals_stmnts = client.get_fundamentals_statements(
    'AAPL', startDate='2019-01-01', asReported=True, fmt='csv'
)
# 텍스트 형태의 데이터를 클렌징을 통해 데이터프레임 형태로 변환
df_fs = pd.DataFrame([x.split(',') for x in fundamentals_stmnts.split('\n')])
# 첫번째 행을 열 이름 지정후 해당 행 삭제
df_fs.columns = df_fs.iloc[0]
df_fs = df_fs[1:]
# 'data' 열을 인덱스로 지정
df_fs.set_index('date', drop=True, inplace=True)
df_fs = df_fs[df_fs.index != '']

df_fs.head()

Unnamed: 0_level_0,year,quarter,statementType,dataCode,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-02-02,2024,1,cashFlow,depamor,2848000000.0
2024-02-02,2024,1,balanceSheet,ppeq,43666000000.0
2024-02-02,2024,1,balanceSheet,intangibles,0.0
2024-02-02,2024,1,incomeStatement,opinc,40373000000.0
2024-02-02,2024,1,incomeStatement,shareswa,15509763000.0


## 티커 수집하기

[https://www.investing.com/](https://www.investing.com/)

In [9]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import math
import pandas as pd

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url = 'https://www.investing.com/stock-screener/?sp=country::5|sector::a|industry::a|equityType::a%3Ceq_market_cap;1'
# open url
driver.get(url)

In [10]:
# HTML 정보에 해당하는 driver.page_source를 BeautifulSoup 객체로 만들어준다.
html = BeautifulSoup(driver.page_source, 'lxml')

In [11]:
# 국가명 확인
html.find(class_='js-search-input inputDropDown')['value']

'United States'

In [12]:
html_table = html.select('table.genTbl.openTbl.resultsStockScreenerTbl.elpTbl')
print(html_table[0])

<table class="genTbl openTbl resultsStockScreenerTbl elpTbl" id="resultsTable">
<thead>
<tr>
<th class="flag"> </th>
<th class="symbol left js-sortable pointer" data-column-name="name_trans"><i title="Name">Name</i><span class="headerSortDefault"></span></th><th class="left js-sortable pointer" data-column-name="viewData.symbol"><i title="Symbol">Symbol</i><span class="headerSortDefault"></span></th><th class="left displayNone js-sortable pointer" data-column-name="exchange_trans"><i title="Exchange">Exchange</i><span class="headerSortDefault"></span></th><th class="left displayNone js-sortable pointer" data-column-name="sector_trans"><i title="Sector">Sector</i><span class="headerSortDefault"></span></th><th class="left displayNone js-sortable pointer" data-column-name="industry_trans"><i title="Industry">Industry</i><span class="headerSortDefault"></span></th><th class="js-sortable pointer" data-column-name="last"><i title="Last">Last</i><span class="headerSortDefault"></span></th><t

In [13]:
# 데이터프레임 변환
# prettify() 메서드를 이용해 BeautifulSoup에서 파싱한 파서 트리를 유니코드 형태로 다시 돌려준 후, read_html() 함수를 통해 테이블을 읽어준다.
df_table = pd.read_html(html_table[0].prettify())
df_table_result = df_table[0]
df_table_result.head()

Unnamed: 0.1,Unnamed: 0,Name,Symbol,Exchange,Sector,Industry,Last,Chg. %,Market Cap,Vol.,...,Bull/Bear Power (13 / 1D),CCI (14 / 1D),Highs/Lows (14 / 1D),ROC (1D),RSI (14 / 1D),STOCH (14 / 1D),STOCHRSI (14 / 1D),Ultimate Oscillator (14 /1D),Williams %R (1D),"var columnsSettings_stock_screener = new ColumnsSettings( 'ltr', // strHtmlDir 'resultsContainer', // containerId 'resultsContainer', // tab 'stock_screener', // id 'Currently selected: X (max 8)', // message '8', // intMaxCheckboxes '1' // intMinOptCheckboxes \t); Adjust table Name Symbol Exchange Sector Industry Last Chg. % Market Cap Vol. P/E Ratio MACD (12,26 / 1D) Revenue Average Vol. (3m) EPS Beta Dividend Yield 15 Minutes Hourly Daily Weekly Monthly Daily 1 Week 1 Month YTD 1 Year 3 Years 1-Year Change Dividend Yield (%) P/E Ratio (TTM) Price to Sales (TTM) Price to Cash Flow (MRQ) Price to Free Cash Flow (TTM) Price to Book (MRQ) Price to Tangible Book (MRQ) EPS(MRQ) vs Qtr. 1 Yr. Ago EPS(TTM) vs TTM 1 Yr. Ago 5 Year EPS Growth Sales (MRQ) vs Qtr. 1 Yr. Ago Sales (TTM) vs TTM 1 Yr. Ago (TTM) 5 Year Sales Growth 5 Year Capital Spending Growth Asset Turnover (TTM) Inventory Turnover (TTM) Revenue/Employee (TTM) Net Income/Employee (TTM) Receivable Turnover (TTM) 52 wk Range - High 52 wk Range - Low % Change from 52 wk High % Change from 52 wk Low Previous Month % Change Gross margin (TTM) Gross Margin (5YA) Operating margin (TTM) Operating margin (5YA) Pretax margin (TTM) Pretax margin (5YA) Net Profit margin (TTM) Net Profit margin (5YA) Quick Ratio (MRQ) Current Ratio (MRQ) LT Debt to Equity (MRQ) Total Debt to Equity Dividend Yield 5 Year Avg. (5YA) Dividend Growth Rate (ANN) Payout Ratio (TTM) ADX (14 / 1D) ATR (14 / 1D) Bull/Bear Power (13 / 1D) CCI (14 / 1D) Highs/Lows (14 / 1D) ROC (1D) RSI (14 / 1D) STOCH (14 / 1D) STOCHRSI (14 / 1D) Ultimate Oscillator (14 /1D) Williams %R (1D) Advanced Metrics Currently selected: 6 (max 8 ) Apply $('#colSelectPopup_stock_screener').click(function(e) { e.stopPropagation(); \t});"
0,,Microsoft,MSFT,NASDAQ,Technology,Software & IT Services,406.22,-0.71%,3.02T,16.71M,...,-1.98,-14.33,0.0,0.51,50.34,53.03,26.15,55.75,-55.79,
1,,Apple,AAPL,NASDAQ,Technology,"Computers, Phones & Household Electronics",170.73,1.02%,2.64T,74.64M,...,-9.95,-110.67,-5.61,-6.22,28.54,23.41,29.59,35.21,-85.02,
2,,NVIDIA,NVDA,NASDAQ,Technology,Semiconductors & Semiconductor Equipment,875.28,-5.55%,2.15T,110.55M,...,183.1,139.49,49.47,20.61,69.91,58.11,49.03,56.44,-31.49,
3,,Amazon.com,AMZN,NASDAQ,Consumer Cyclicals,Diversified Retail,175.35,-0.83%,1.82T,35.87M,...,3.9,70.49,0.0,3.41,58.79,51.17,23.6,47.78,-33.68,
4,,Alphabet C,GOOG,NASDAQ,Technology,Software & IT Services,136.29,0.78%,1.69T,26.00M,...,-2.07,-50.18,-1.17,-3.84,41.59,28.05,50.62,45.44,-67.44,


In [14]:
df_table_select = df_table[0][['Name', 'Symbol', 'Exchange', 'Sector', 'Market Cap']]
df_table_select.head()

Unnamed: 0,Name,Symbol,Exchange,Sector,Market Cap
0,Microsoft,MSFT,NASDAQ,Technology,3.02T
1,Apple,AAPL,NASDAQ,Technology,2.64T
2,NVIDIA,NVDA,NASDAQ,Technology,2.15T
3,Amazon.com,AMZN,NASDAQ,Consumer Cyclicals,1.82T
4,Alphabet C,GOOG,NASDAQ,Technology,1.69T


In [15]:
# 페이지 수 계산
end_num = driver.find_element(By.CLASS_NAME, value='js-total-results').text
print(math.ceil(int(end_num) / 50))

223


In [16]:
# 드라이버 종료
driver.quit()

## 전 종목 티커 크롤링

In [17]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
from datetime import datetime
import math
import pandas as pd
import numpy as np
from tqdm import tqdm
import time

# 크롬 드라이버 불러오기
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# 국가코드 미국에 해당하는 '5'를 입력
nationcode = '5'
# 첫페이지 URL
url = f'''https://investing.com/stock-screener/?sp=country::
{nationcode}|sector::a|industry::a|equityType::ORD%3Ceq_market_cap;1'''
# 셀레니움으로 페이지 열기
driver.get(url)

# 'Screener Results'에 해당하는 부문은 종목이 들어있는 테이블이 로딩된 이후 나타난다. 
# WebDriverWait() 함수를 통해 해당 테이블이 로딩될 떄까지 기다린다.
# 테이블의 XPATH는 '//*[@id="resultsTable"]/tbody'
WebDriverWait(driver, 10).until(EC.visibility_of_element_located(
    (By.XPATH, '//*[@id="resultsTable"]/tbody')
))

# 종목수에 해당하는 부분을 크롤링한 후, 이를 통해 페이지 수를 계산
end_num = driver.find_element(By.CLASS_NAME, value='js-total-results').text
end_num = math.ceil(int(end_num) / 50)

In [18]:
all_data_df = []

# 전체 페이지에서 종목명과 티커 등의 정보를 크롤링

for i in tqdm(range(1, end_num + 1)):
    url = f'''https://investing.com/stock-screener/?sp=country::
        {nationcode}|sector::a|industry::a|equityType::ORD%3Ceq_market_cap;{i}'''
    driver.get(url)
    
    try:
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located(
            (By.XPATH, '//*[@id="resultsTable"]/tbody')
        ))
    except:
        time.sleep(1)
        driver.refresh()
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located(
            (By.XPATH, '//*[@id="resultsTable"]/tbody')
        ))
    
    html = BeautifulSoup(driver.page_source, 'lxml')
    html_table = html.select(
        'table.genTbl.openTbl.resultsStockScreenerTbl.elpTbl'
    )
    df_table = pd.read_html(html_table[0].prettify())
    df_table_select = df_table[0][['Name', 'Symbol', 'Exchange', 'Sector', 'Market Cap']]
    
    all_data_df.append(df_table_select)
    
    time.sleep(2)
    
# for 문이 끝나면 concat() 함수를 통해 리스트 내 모든 데이터프레임을 행으로 묶어 준다.
all_data_df_bind = pd.concat(all_data_df, axis=0)

data_country = html.find(class_='js-search-input inputDropDown')['value']
all_data_df_bind['country'] = data_country
all_data_df_bind['date'] = datetime.today().strftime('%Y-%m-%d')
# 일부 종목의 경우 종목명이 빈칸으로 들어오므로 이를 제거
all_data_df_bind = all_data_df_bind[~all_data_df_bind['Name'].isnull()]
# 거래 가능한 거래소만 선택
all_data_df_bind = all_data_df_bind[all_data_df_bind['Exchange'].isin(
    ['NASDAQ', 'NYSE', 'NYSE Amex']
)]
# 중복으로 들어오는 경우 1개만 남김
all_data_df_bind = all_data_df_bind.drop_duplicates(['Symbol'])
all_data_df_bind.reset_index(inplace=True, drop=True)
all_data_df_bind = all_data_df_bind.replace({np.nan: None})

driver.quit()

  0%|          | 0/166 [00:00<?, ?it/s]

100%|██████████| 166/166 [17:12<00:00,  6.22s/it] 


In [19]:
all_data_df_bind[1150:1159]

Unnamed: 0,Name,Symbol,Exchange,Sector,Market Cap,country,date
1150,Madison Square Garden Sports,MSGS,NYSE,Consumer Cyclicals,4.44B,United States,2024-03-10
1151,Clearway Energy A,CWENa,NYSE,Utilities,4.44B,United States,2024-03-10
1152,Clearway Energy C,CWEN,NYSE,Utilities,4.44B,United States,2024-03-10
1153,Herc Holdings,HRI,NYSE,Industrials,4.43B,United States,2024-03-10
1154,Algonquin Power,AQN,NYSE,Utilities,4.42B,United States,2024-03-10
1155,Freedom,FRHC,NASDAQ,Financials,4.42B,United States,2024-03-10
1156,American Equity Inv. Life,AEL,NYSE,Financials,4.41B,United States,2024-03-10
1157,American Eagle Outfitters,AEO,NYSE,Consumer Cyclicals,4.40B,United States,2024-03-10
1158,Iovance Biotherapeutics,IOVA,NASDAQ,Healthcare,4.39B,United States,2024-03-10


In [20]:
# insert into database
import pymysql

con = pymysql.connect(user=user,
              passwd=pw,
              host=host,
              db=db,
              charset='utf8')
mycursor = con.cursor()
query = """
INSERT INTO ticker_global (name, symbol, exchange, sector, market_cap, country, date)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
name=VALUES(name), exchange=VALUES(exchange), sector=VALUES(exchange), market_cap=VALUES(market_cap);
"""

args = all_data_df_bind.values.tolist()

mycursor.executemany(query, args)
con.commit()

con.close()

NameError: name 'user' is not defined

## 주가 다운로드

야후 파이낸스는 전 세계 주가를 제공하고 있다.
[https://finance.yahoo.com](https://finance.yahoo.com)

In [None]:
import yfinance as yf 

price = yf.download('AAPL')
price.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-12-12,0.128348,0.128906,0.128348,0.128348,0.099319,469033600
1980-12-15,0.12221,0.12221,0.121652,0.121652,0.094137,175884800
1980-12-16,0.113281,0.113281,0.112723,0.112723,0.087228,105728000
1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089387,86441600
1980-12-18,0.118862,0.11942,0.118862,0.118862,0.091978,73449600


In [None]:
# [1 of 1 completed] 출력하고 싶지 않을 시, progress=False인자를 추가
price = yf.download('AAPL', progress=False)

In [None]:
# start 인자를 추가하면 데이터 다운로드 기간을 변경 가능
price = yf.download('AAPL', start='2000-01-01', progress=False)
price.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,0.936384,1.004464,0.907924,0.999442,0.847207,535796800
2000-01-04,0.966518,0.987723,0.90346,0.915179,0.775779,512377600
2000-01-05,0.926339,0.987165,0.919643,0.928571,0.787131,778321600
2000-01-06,0.947545,0.955357,0.848214,0.848214,0.719014,767972800
2000-01-07,0.861607,0.901786,0.852679,0.888393,0.753073,460734400


In [None]:
# 일본 티커
price = yf.download('8035.T', progress= False)
price.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-04,13800.0,14000.0,12890.0,13010.0,11240.482422,231000
2000-01-05,11510.0,12140.0,11020.0,11950.0,10324.65332,672000
2000-01-06,12000.0,12600.0,11000.0,11020.0,9521.146484,688000
2000-01-07,10800.0,11530.0,10530.0,10920.0,9434.741211,1203000
2000-01-10,10920.0,10920.0,10920.0,10920.0,9434.741211,0


### 전 종목 주가 다운로드

In [None]:
# import packages
import pymysql
from sqlalchemy import create_engine
import pandas as pd
import yfinance as yf
import time
from tqdm import tqdm

user = 'root'
pw = '04250629'
host = '127.0.0.1'
db = 'stock'
port = '3306'

# connect DB
engine = create_engine(f'mysql+pymysql://{user}:{pw}@{host}:{port}/{db}')

con = pymysql.connect(
    user=user,
    passwd=pw,
    host=host,
    db=db,
    charset='utf8'
)
mycursor = con.cursor()

# select ticker lists
ticker_list = pd.read_sql(
    """ 
    SELECT * FROM ticker_global
    WHERE date = (SELECT MAX(date) FROM ticker_global)
    AND country = 'United States';
    """
, con=engine)

# insert into database
query = """ 
INSERT INTO price_global (date, high, low, open, close, volume, adj_close, ticker)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
high=VALUE(high), low=VALUES(low), open=VALUES(open), close=VALUES(close), volume=VALUES(volume), adj_close=VALUES(adj_close);
"""

In [None]:
# error list
error_list = []

# download all prices of all tickers
for i in tqdm(range(0, len(ticker_list))):
    
    # select a ticker
    ticker = ticker_list['symbol'][i]
    
    # if error happens, it will be ignored
    try:
        # download price
        price = yf.download(ticker, progress=False)
        
        # clean data
        price = price.reset_index()
        price['ticker'] = ticker
        
        # insert into DB
        args = price.values.tolist()
        mycursor.executemany(query, args)
        con.commit()
    
    except:
        print(ticker)
        error_list.append(ticker)
        
    time.sleep(2)

# close the connection
engine.dispose()
con.close()

  3%|▎         | 146/5286 [06:17<3:30:49,  2.46s/it]
1 Failed download:
['AGMA']: Exception('%ticker%: No timezone found, symbol may be delisted')
 12%|█▏        | 610/5286 [26:42<3:20:16,  2.57s/it]
1 Failed download:
['BFA']: Exception('%ticker%: No timezone found, symbol may be delisted')
 12%|█▏        | 613/5286 [26:50<3:17:41,  2.54s/it]
1 Failed download:
['BFB']: Exception('%ticker%: No timezone found, symbol may be delisted')
 12%|█▏        | 654/5286 [28:39<3:35:53,  2.80s/it]
1 Failed download:
['BIOB']: Exception('%ticker%: No timezone found, symbol may be delisted')
 14%|█▎        | 720/5286 [31:35<3:14:02,  2.55s/it]
1 Failed download:
['BNREA']: Exception('%ticker%: No timezone found, symbol may be delisted')
 14%|█▍        | 760/5286 [33:19<3:24:08,  2.71s/it]
1 Failed download:
['BRKA']: Exception('%ticker%: No timezone found, symbol may be delisted')
 14%|█▍        | 761/5286 [33:22<3:23:35,  2.70s/it]
1 Failed download:
['BRKB']: Exception('%ticker%: No timezone foun

## 재무제표 다운로드

In [None]:
from yahooquery import Ticker
import numpy as np

data = Ticker('AAPL')

In [None]:
# annual financial statements
data_y = data.all_financial_data(frequency='a')
data_y

Unnamed: 0_level_0,asOfDate,periodType,currencyCode,AccountsPayable,AccountsReceivable,AccumulatedDepreciation,AvailableForSaleSecurities,BasicAverageShares,BasicEPS,BeginningCashPosition,...,TotalEquityGrossMinorityInterest,TotalExpenses,TotalLiabilitiesNetMinorityInterest,TotalNonCurrentAssets,TotalNonCurrentLiabilitiesNetMinorityInterest,TotalOperatingIncomeAsReported,TotalRevenue,TradeandOtherPayablesNonCurrent,TreasurySharesNumber,WorkingCapital
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAPL,2020-09-30,12M,USD,42296000000.0,16120000000.0,-66760000000.0,100887000000.0,17352120000.0,3.31,50224000000.0,...,65339000000.0,208227000000.0,258549000000.0,180175000000.0,153157000000.0,66288000000.0,274515000000.0,28170000000.0,,38321000000.0
AAPL,2021-09-30,12M,USD,54763000000.0,26278000000.0,-70283000000.0,127877000000.0,16701270000.0,5.67,39789000000.0,...,63090000000.0,256868000000.0,287912000000.0,216166000000.0,162431000000.0,108949000000.0,365817000000.0,24689000000.0,,9355000000.0
AAPL,2022-09-30,12M,USD,64115000000.0,28184000000.0,-72340000000.0,120805000000.0,16215960000.0,6.15,35929000000.0,...,50672000000.0,274891000000.0,302083000000.0,217350000000.0,148101000000.0,119437000000.0,394328000000.0,16657000000.0,,-18577000000.0
AAPL,2023-09-30,12M,USD,62611000000.0,29508000000.0,,100544000000.0,15744230000.0,6.16,24977000000.0,...,62146000000.0,268984000000.0,290437000000.0,209017000000.0,145129000000.0,114301000000.0,383285000000.0,,0.0,-1742000000.0


In [None]:
# clean data
data_y.reset_index(inplace=True)
data_y = data_y.loc[:, ~data_y.columns.isin(['periodType', 'currencyCode'])]
data_y = data_y.melt(id_vars=['symbol', 'asOfDate'])
data_y = data_y.replace([np.nan], None)
data_y['freq'] = 'y'
data_y.columns = ['ticker', 'date', 'account', 'value', 'freq']

data_y.head()

Unnamed: 0,ticker,date,account,value,freq
0,AAPL,2020-09-30,AccountsPayable,42296000000.0,y
1,AAPL,2021-09-30,AccountsPayable,54763000000.0,y
2,AAPL,2022-09-30,AccountsPayable,64115000000.0,y
3,AAPL,2023-09-30,AccountsPayable,62611000000.0,y
4,AAPL,2020-09-30,AccountsReceivable,16120000000.0,y


In [None]:
# quaterly financial statements
data_q = data.all_financial_data(frequency='q')
data_q.reset_index(inplace=True)
data_q = data_q.loc[:, ~data_q.columns.isin(['periodType', 'currencyCode'])]
data_q = data_q.melt(id_vars=['symbol', 'asOfDate'])
data_q = data_q.replace([np.nan], None)
data_q['freq'] = 'q'
data_q.columns = ['ticker', 'date', 'account', 'value', 'freq']

data_q.head()

Unnamed: 0,ticker,date,account,value,freq
0,AAPL,2022-09-30,AccountsPayable,64115000000.0,q
1,AAPL,2022-12-31,AccountsPayable,57918000000.0,q
2,AAPL,2023-03-31,AccountsPayable,42945000000.0,q
3,AAPL,2023-06-30,AccountsPayable,46699000000.0,q
4,AAPL,2023-09-30,AccountsPayable,62611000000.0,q


### 전 종목 재무제표 다운로드

In [None]:
from sqlalchemy import create_engine
import pymysql
import pandas as pd
from yahooquery import Ticker
import time
from tqdm import tqdm
import numpy as np
import keyring

# connect Database

user = 'root'
pw = keyring.get_password('local_db', user)
host = '127.0.0.1'
port = '3306'
db = 'stock'

engine = create_engine(f'mysql+pymysql://{user}:{pw}@{host}:{port}/{db}')
con = pymysql.connect(
    user=user,
    passwd=pw,
    host=host,
    db=db,
    charset='utf8'
)
mycursor = con.cursor()

In [None]:
con.close()
engine.dispose()

In [None]:
# select ticker list
ticker_list = pd.read_sql(
    """ 
    SELECT * FROM ticker_global
    WHERE date = (SELECT MAX(date) FROM ticker_global)
    AND country='United States';
    """
, con=engine)

In [None]:
# insert query
query_fs = """ 
    INSERT INTO fs_global (ticker, date, account, value, freq)
    VALUES (%s, %s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
    value = VALUES(value);
"""

# error list
error_list = []

# download all fs
for i in tqdm(range(0, len(ticker_list))):
    # select ticker
    ticker = ticker_list['symbol'][i]
    
    try:
        # download data
        data = Ticker(ticker)
        
        # yearly fs
        data_y = data.all_financial_data(frequency='a')
        data_y.reset_index(inplace=True)
        data_y = data_y.loc[:, ~data_y.columns.isin(['periodType', 'currencyCode'])]
        data_y = data_y.melt(id_vars= ['symbol', 'asOfDate'])
        data_y = data_y.replace([np.nan], None)
        data_y['freq'] = 'y'
        data_y.columns = ['ticker', 'date', 'account', 'value', 'freq']
        
        # quaterly fs
        data_q = data.all_financial_data(frequency='q')
        data_q.reset_index(inplace=True)
        data_q = data_q.loc[:, ~data_q.columns.isin(['periodType', 'currencyCode'])]
        data_q = data_q.melt(id_vars= ['symbol', 'asOfDate'])
        data_q = data_q.replace([np.nan], None)
        data_q['freq'] = 'q'
        data_q.columns = ['ticker', 'date', 'account', 'value', 'freq']
        
        # concat
        data_fs = pd.concat([data_y, data_q], axis=0)
        
        # insert into db
        args = data_fs.values.tolist()
        mycursor.executemany(query_fs, args)
        con.commit()
    
    except:
        print(ticker)
        error_list.append(ticker)
        
    time.sleep(2)

# close db connection
engine.dispose()
con.close()
  

  3%|▎         | 146/5286 [09:40<5:36:48,  3.93s/it]

AGMa


  4%|▍         | 214/5286 [14:06<5:38:29,  4.00s/it]

ALLG


  7%|▋         | 359/5286 [23:33<5:31:00,  4.03s/it]

ARBB


  7%|▋         | 394/5286 [25:51<5:32:36,  4.08s/it]

ARVL


 11%|█         | 572/5286 [37:35<5:37:17,  4.29s/it]

BCAL


 12%|█▏        | 610/5286 [40:06<5:07:05,  3.94s/it]

BFa


 12%|█▏        | 613/5286 [40:17<5:01:00,  3.86s/it]

BFb


 12%|█▏        | 632/5286 [41:31<5:09:44,  3.99s/it]

BHa


 12%|█▏        | 634/5286 [41:37<4:37:13,  3.58s/it]

BHAT


 12%|█▏        | 654/5286 [42:57<5:17:34,  4.11s/it]

BIOb


 14%|█▎        | 720/5286 [47:17<5:03:29,  3.99s/it]

BNREa


 14%|█▍        | 738/5286 [48:26<4:58:15,  3.93s/it]

BOWN


 14%|█▍        | 760/5286 [49:52<4:47:22,  3.81s/it]

BRKa


 14%|█▍        | 761/5286 [49:55<4:25:49,  3.52s/it]

BRKb


 15%|█▌        | 808/5286 [53:00<5:06:41,  4.11s/it]

BVFL


 16%|█▋        | 866/5286 [56:53<5:00:36,  4.08s/it]

CAPT


 22%|██▏       | 1168/5286 [1:17:10<4:36:40,  4.03s/it]

CPBI


 23%|██▎       | 1195/5286 [1:19:00<4:40:09,  4.11s/it]

CRDa


 23%|██▎       | 1196/5286 [1:19:03<4:13:41,  3.72s/it]

CRDb


 25%|██▍       | 1313/5286 [1:27:00<4:25:56,  4.02s/it]

CWENa


 30%|██▉       | 1585/5286 [1:45:38<4:03:20,  3.94s/it]

ELEP


 38%|███▊      | 2006/5286 [2:14:05<3:48:56,  4.19s/it]

GEFb


 38%|███▊      | 2017/5286 [2:14:49<3:49:15,  4.21s/it]

GETR


 39%|███▉      | 2065/5286 [2:18:05<3:34:05,  3.99s/it]

GLTA


 41%|████      | 2155/5286 [2:24:10<3:26:38,  3.96s/it]

GTH


 41%|████      | 2161/5286 [2:24:33<3:29:41,  4.03s/it]

GTNa


 42%|████▏     | 2215/5286 [2:28:07<3:27:37,  4.06s/it]

HEIa


 44%|████▍     | 2337/5286 [2:36:29<3:23:19,  4.14s/it]

HVTa


 46%|████▌     | 2429/5286 [2:42:46<3:07:40,  3.94s/it]

IMOS


 46%|████▋     | 2455/5286 [2:44:31<3:15:19,  4.14s/it]

INHD


 47%|████▋     | 2508/5286 [2:48:10<3:00:03,  3.89s/it]

IPXX


 51%|█████     | 2708/5286 [3:01:50<2:56:19,  4.10s/it]

KUKE


 52%|█████▏    | 2728/5286 [3:03:09<2:48:11,  3.95s/it]

LAC_w


 53%|█████▎    | 2776/5286 [3:06:25<2:56:45,  4.23s/it]

LENb


 53%|█████▎    | 2790/5286 [3:07:21<2:52:37,  4.15s/it]

LGCL


 53%|█████▎    | 2791/5286 [3:07:24<2:38:03,  3.80s/it]

LGFa


 53%|█████▎    | 2792/5286 [3:07:27<2:26:31,  3.53s/it]

LGFb


 54%|█████▎    | 2832/5286 [3:10:06<2:51:21,  4.19s/it]

LLYVA


 54%|█████▎    | 2833/5286 [3:10:09<2:34:42,  3.78s/it]

LLYVK


 58%|█████▊    | 3051/5286 [3:24:57<2:27:09,  3.95s/it]

MHLA


 58%|█████▊    | 3072/5286 [3:26:20<2:26:26,  3.97s/it]

MKCv


 59%|█████▉    | 3114/5286 [3:29:11<2:32:41,  4.22s/it]

MNY


 59%|█████▉    | 3124/5286 [3:29:50<2:22:16,  3.95s/it]

MOGa


 59%|█████▉    | 3125/5286 [3:29:53<2:11:17,  3.65s/it]

MOGb


 62%|██████▏   | 3284/5286 [3:40:49<2:13:33,  4.00s/it]

NETD


 64%|██████▍   | 3387/5286 [3:47:47<2:11:52,  4.17s/it]

NTBL


 65%|██████▍   | 3422/5286 [3:50:09<2:09:53,  4.18s/it]

NVNI


 67%|██████▋   | 3562/5286 [3:59:39<2:01:17,  4.22s/it]

OPTX


 71%|███████   | 3729/5286 [4:11:05<1:45:11,  4.05s/it]

PHGE_u


 76%|███████▌  | 4022/5286 [4:31:06<1:23:10,  3.95s/it]

RF_pc


 77%|███████▋  | 4053/5286 [4:33:10<1:21:13,  3.95s/it]

RLND


 84%|████████▎ | 4415/5286 [4:58:00<1:00:46,  4.19s/it]

SPAQ


 85%|████████▍ | 4493/5286 [5:03:22<57:50,  4.38s/it]  

STHO


 87%|████████▋ | 4577/5286 [5:09:09<47:58,  4.06s/it]

TAPa


 92%|█████████▏| 4851/5286 [5:28:09<31:10,  4.30s/it]

UHALb


 92%|█████████▏| 4889/5286 [5:30:44<27:02,  4.09s/it]

US90275F1966=UBSS


 96%|█████████▌| 5073/5286 [5:43:13<14:00,  3.94s/it]

WALD


 96%|█████████▌| 5082/5286 [5:43:49<14:00,  4.12s/it]

WBUY


 98%|█████████▊| 5167/5286 [5:49:37<08:14,  4.16s/it]

WSOb


 99%|█████████▉| 5249/5286 [5:55:15<02:35,  4.19s/it]

ZAPP


100%|██████████| 5286/5286 [5:57:47<00:00,  4.06s/it]
