In [1]:
import json
import requests
import pandas as pd
import re
import string

In [66]:
# 데이터 불러오기
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
limit = 0
api_url = f'https://api.nasdaq.com/api/screener/stocks?tableonly=true&limit={limit}'
try:
    res = requests.get(api_url, headers=headers)
except Exception as e:
    print(e)

In [67]:
# deserialization
data = json.loads(res.text)

In [68]:
# 상장 종목 수 체크
num_of_listed_comp = data.get('data').get('totalrecords')

In [69]:
print(f"number of listed stocks : {num_of_listed_comp}")

number of listed stocks : 7863


In [70]:
# 상장 종목 리스트
stock_info = data['data']['table']['rows']
stock_info = pd.DataFrame(stock_info)
# 상장 종목 symbol이랑 name만 남김
stock_list = stock_info.iloc[:,:2]

In [71]:
# 정규표현식 설명
# \,| : comma(,) 매칭
# \s\(([\w].*)\)*| : 괄호로 쌓여있는 단어 매칭
# (\snew)?(\s[\w]*)?(\sstock|\sshare).*| : 'new 단어 stock', 'new 단어 share', '단어 stock', '단어 share' 뒤 모든 문자 매칭
# ((?<=inc\.)|(?<=inc))\s(\w|\W).*|((?<=ltd\.)|(?<=ltd))\s(\w|\W).*| : inc나 ltd 뒤 모든 문자 매칭
# american\sd.+(share+s?|receipt).* : 'american d로시작하는단어 shares 또는 receipt' 문자 매칭
# flag : 대소문자 무시

pattern = r"\,|\s\(([\w].*)\)*|(\snew)?(\s[\w]*)?(\sstock|\sshare).*|((?<=inc\.)|(?<=inc))\s(\w|\W).*|((?<=ltd\.)|(?<=ltd))\s(\w|\W).*|american\sd.+(share+s?|receipt).*"
regex = re.compile(pattern, flags=re.IGNORECASE)

In [72]:
for index, row in stock_list.iterrows():
    # %가 포함 종목 제거
    if re.search(r'\%',row['name']):
        stock_list = stock_list.drop(index=index)
        continue

    # 필요 없는 문자 제거
    row['name'] = regex.sub('', row['name'])

In [80]:
stock_list

Unnamed: 0,symbol,name
0,AAPL,Apple Inc.
1,MSFT,Microsoft Corporation
2,AMZN,Amazon.com Inc.
3,GOOG,Alphabet Inc.
4,GOOGL,Alphabet Inc.
...,...,...
7858,ZNTEU,Zanite Acquisition Corp. Unit
7859,ZNTEW,Zanite Acquisition Corp. Warrant
7860,ZTAQU,Zimmer Energy Transition Acquisition Corp. Units
7861,ZWRKU,Z-Work Acquisition Corp. Units


In [82]:
o = pd.read_csv("comp_list.csv")
o = o.iloc[:,1:3]

In [75]:
o.head()

Unnamed: 0,symbol,name
0,AAPL,Apple Inc.
1,MSFT,Microsoft Corporation
2,GOOG,Alphabet Inc.
3,AMZN,Amazon.com Inc.
4,GOOGL,Alphabet Inc.


In [76]:
# 상장 종목 업데이트
old_stock_list = o # db에서 불러온 종목 리스트. (지금은 임의로 할당. 나중에 수정 필요)
new_stock_list = stock_list
try:
    merged_stock_list = pd.merge(old_stock_list, new_stock_list, how='outer')
except Exception as e:
    print(e)

In [81]:
merged_stock_list.loc[merged_stock_list['symbol'] == 'AMH']

Unnamed: 0,symbol,name
794,AMH,
7549,AMH,American Homes 4 Rent


In [78]:
# 추가된 종목
updated_stock_list = pd.concat([old_stock_list, merged_stock_list]).drop_duplicates(keep=False)
updated_stock_list

Unnamed: 0,symbol,name
7543,TTE,TotalEnergies SE
7544,AMX,America Movil S.A.B. de C.V.
7545,YMM,Full Truck Alliance Co. Ltd.
7546,GRUB,Just Eat Takeaway.com N.V.
7547,BZ,KANZHUN LIMITED
...,...,...
11110,ZGYHU,Yunhong International Unit
11111,ZIONO,"Zions Bancorporation, N.A. Dep Shs Repstg 1/40..."
11112,ZIONP,"Zions Bancorporation, N.A. Depositary Shares (..."
11113,ZIVOW,"Zivo Bioscience, Inc. Warrants"


In [79]:
# 없어진 종목
removed_stock_list = pd.concat([new_stock_list, merged_stock_list]).drop_duplicates(keep=False)
removed_stock_list

Unnamed: 0,symbol,name
92,TOT,Total SE
240,AMX,America Movil S.A.B. de C.V. American Deposito...
794,AMH,
1081,TCF,TCF Financial Corporation
1219,CLGX,CoreLogic Inc.
...,...,...
7532,YSACW,Yellowstone Acquisition Company Warrants to pu...
7535,ZIONN,Zions Bancorporation N.A. Dep Shs Repstg 1/40t...
7536,ZIONO,Zions Bancorporation N.A. Dep Shs Repstg 1/40t...
7537,ZIONP,Zions Bancorporation N.A.
