In [2]:
import json
import requests
import pandas as pd
import re
import string

In [3]:
# 데이터 불러오기
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
limit = 0
api_url = f'https://api.nasdaq.com/api/screener/stocks?tableonly=true&limit={limit}'
try:
    res = requests.get(api_url, headers=headers)
except Exception as e:
    print(e)

In [4]:
# deserialization
data = json.loads(res.text)

In [5]:
# 상장 종목 수 체크
num_of_listed_comp = data.get('data').get('totalrecords')

In [6]:
print(f"number of listed stocks : {num_of_listed_comp}")

number of listed stocks : 7752


In [7]:
# 상장 종목 리스트
stock_info = data['data']['table']['rows']
stock_info = pd.DataFrame(stock_info)
# 상장 종목 symbol이랑 name만 남김
stock_list = stock_info.iloc[:,:2]

In [8]:
# 정규표현식 설명
# \,| : comma(,) 매칭
# \s\(([\w].*)\)*| : 괄호로 쌓여있는 단어 매칭
# (\snew)?(\s[\w]*)?(\sstock|\sshare).*| : 'new 단어 stock', 'new 단어 share', '단어 stock', '단어 share' 뒤 모든 문자 매칭
# ((?<=inc\.)|(?<=inc))\s(\w|\W).*|((?<=ltd\.)|(?<=ltd))\s(\w|\W).*| : inc나 ltd 뒤 모든 문자 매칭
# (american.+[\w].share).* : 'american 단어 share' 뒤 모든 문자 매칭
# flag : 대소문자 무시

pattern = r'''
    \,|
    \s\(([\w].*)\)*|
    (\snew)?(\s[\w]*)?(\sstock|\sshare).*|
    ((?<=inc\.)|(?<=inc))\s(\w|\W).*|((?<=ltd\.)|
    (?<=ltd))\s(\w|\W).*|
    (american.+[\w].share+s?).*
    '''
regex = re.compile(pattern, flags=re.IGNORECASE)

In [9]:
for index, row in stock_list.iterrows():
    # %가 포함 종목 제거
    if re.search(r'\%',row['name']):
        stock_list = stock_list.drop(index=index)
        continue

    # 필요 없는 문자 제거
    row['name'] = regex.sub('', row['name'])

In [10]:
stock_list

Unnamed: 0,symbol,name
0,AAPL,Apple Inc. Common Stock
1,MSFT,Microsoft Corporation Common Stock
2,GOOG,Alphabet Inc. Class C Capital Stock
3,AMZN,"Amazon.com, Inc. Common Stock"
4,GOOGL,Alphabet Inc. Class A Common Stock
...,...,...
7747,ZIVOW,"Zivo Bioscience, Inc. Warrants"
7748,ZNTEU,Zanite Acquisition Corp. Unit
7749,ZNTEW,Zanite Acquisition Corp. Warrant
7750,ZWRKU,Z-Work Acquisition Corp. Units


In [87]:
# 상장 종목 업데이트
old_stock_list = float('nan') # db에서 불러온 종목 리스트
new_stock_list = stock_list
try:
    merged_stock_list = pd.merge(old_stock_list, new_stock_list, how='outer')
except Exception as e:
    print(e)

Can only merge Series or DataFrame objects, a <class 'float'> was passed


In [61]:
# 추가된 종목
updated_stock_list = pd.concat([old_df, merged_stock_list]).drop_duplicates(keep=False)
updated_stock_list

Unnamed: 0,symbol,name
3,AMZN,Amazon.com Inc.
4,FB,Facebook Inc.


In [64]:
# 없어진 종목
removed_stock_list = pd.concat([new_df, merged_stock_list]).drop_duplicates(keep=False)
removed_stock_list

Unnamed: 0,symbol,name
0,AAPL,Apple Inc.
