In [1]:
import requests

import pandas as pd

import re

In [35]:
sector_codes = [
    'D0011013', # 전기전자
    'D0011006', # 섬유의복
    'D0011015', # 운수장비
    'D0011022', # 은행
    'D0011011', # 철강금속
    'D0011007', # 종이목재
    'D0011024', # 증권
    'D0011016', # 유통업
    'D0011010', # 비금속광물
    'D0011017', # 전기가스업
    'D0011005', # 음식료품
    'D0011009', # 의약품
    'D0011014', # 의료정밀
    'D0011026', # 서비스업
    'D0011012', # 기계
    'D0011021', # 금융업
    'D0011025', # 보험
    'D0011018', # 건설업
    'D0011008', # 화학
    'D0011020', # 통신업
    'D0011019', # 운수창고
]

In [36]:
a = [1,45,2,67,45,3]
sorted(a)

[1, 2, 3, 45, 45, 67]

In [37]:
def get_DAUM_sector_info(sector_code, sort_field='accTradePrice', reverse=True, page=1):
    order = 'asc'
    if reverse:
        order = 'desc'
    
    # Only get Top 100 거래량순
    request_url = f'https://finance.daum.net/api/sectors/{sector_code}/includedStocks?symbolCode={sector_code}&perPage=100&fieldName={sort_field}&order={order}&page={page}&pagination=true'
    headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
    "referer": f"https://finance.daum.net/domestic/sectors/{sector_code}",
    
}
    response = requests.get(request_url, headers=headers)

    if response.status_code == 200:
        return response.json()
    else:
        print(response)
        return None
    


In [38]:
sec_code = 'D0011021' # 금융업. 5페이지까지 있음

In [39]:
r = get_DAUM_sector_info(sec_code, page=2) # page 넘어가면 마지막 페이지 줌

In [40]:
len(r['includedStocks'])

36

In [41]:
def sectordata2df(res_json, save=False):
    sectorname = res_json['sectorName']
    sectorcode = res_json['symbolCode']
    data = res_json['includedStocks']

    no_special_chars = re.compile('[^ㄱ-힣A-Za-z0-9]')
    sectorname = re.sub(no_special_chars, '', sectorname)
    
    df = pd.DataFrame(data)

    if save:
        df.to_pickle(f'{sectorcode}_{sectorname}.pkl')

    return sectorname, df

In [53]:
sector_dfs = []
for sector_code in sector_codes:
    dfs = []
    for page in range(1, 4):
        res_json = get_DAUM_sector_info(sector_code, page=page)
        sectorname, df = sectordata2df(res_json, save=False)
        dfs.append(df)
    
    all_df = pd.concat(dfs, ignore_index=True)
    all_df = all_df.drop_duplicates()
    sector_dfs.append(all_df)

    filename = f'{sector_code}_{sectorname}.pkl'
    all_df.to_pickle(filename)

    all_df['sector'] = sectorname
    print(f'{sectorname}')

all_sector_df = pd.concat(sector_dfs, ignore_index=True)
all_sector_df = all_sector_df.drop_duplicates()

all_sector_df

전기전자
섬유의복
운수장비
은행
철강및금속
종이목재
증권
유통업
비금속광물
전기가스업
음식료품
의약품
의료정밀
서비스업
기계
금융업
보험
건설업
화학
통신업
운수창고


Unnamed: 0,name,code,symbolCode,tradePrice,change,changePrice,changeRate,accTradeVolume,accTradePrice,marketCap,foreignRatio,sector
0,LG에너지솔루션,KR7373220003,A373220,415500.0,RISE,22000.0,0.055909,1653323.0,6.761996e+11,9.722700e+13,3.670,전기전자
1,삼성전자,KR7005930003,A005930,61900.0,RISE,100.0,0.001618,10699077.0,6.642256e+11,3.695295e+14,49.820,전기전자
2,SK하이닉스,KR7000660001,A000660,99400.0,FALL,600.0,-0.006000,3345588.0,3.339564e+11,7.236344e+13,49.910,전기전자
3,LG이노텍,KR7011070000,A011070,356500.0,RISE,4000.0,0.011348,419743.0,1.525820e+11,8.437324e+12,26.430,전기전자
4,삼성SDI,KR7006400006,A006400,578000.0,RISE,22000.0,0.039568,257067.0,1.464696e+11,3.974590e+13,42.900,전기전자
...,...,...,...,...,...,...,...,...,...,...,...,...
952,대한항공우,KR7003491008,A003495,31500.0,EVEN,0.0,0.000000,892.0,2.816730e+07,3.499001e+10,1.410,운수창고
953,동양고속,KR7084670009,A084670,16100.0,EVEN,0.0,0.000000,625.0,1.005710e+07,4.661866e+10,0.000,운수창고
954,천일고속,KR7000650002,A000650,64800.0,RISE,200.0,0.003096,72.0,4.628700e+06,9.261346e+10,0.050,운수창고
955,세방우,KR7004361002,A004365,6880.0,FALL,50.0,-0.007215,420.0,2.892560e+06,2.538204e+10,0.000,운수창고


In [54]:
all_sector_df.columns

Index(['name', 'code', 'symbolCode', 'tradePrice', 'change', 'changePrice',
       'changeRate', 'accTradeVolume', 'accTradePrice', 'marketCap',
       'foreignRatio', 'sector'],
      dtype='object')

In [55]:
all_sector_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 957 entries, 0 to 956
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   name            957 non-null    object 
 1   code            957 non-null    object 
 2   symbolCode      957 non-null    object 
 3   tradePrice      957 non-null    float64
 4   change          957 non-null    object 
 5   changePrice     957 non-null    float64
 6   changeRate      957 non-null    float64
 7   accTradeVolume  957 non-null    float64
 8   accTradePrice   957 non-null    float64
 9   marketCap       957 non-null    float64
 10  foreignRatio    957 non-null    object 
 11  sector          957 non-null    object 
dtypes: float64(6), object(6)
memory usage: 97.2+ KB


In [56]:
all_sector_df['sid'] = all_sector_df['symbolCode'].apply(lambda x: x[1:])

In [57]:
all_sector_df.to_pickle('all_sector_df.pkl')

In [45]:
pd.read_pickle('D0011021_금융업.pkl')

Unnamed: 0,name,code,symbolCode,tradePrice,change,changePrice,changeRate,accTradeVolume,accTradePrice,marketCap,foreignRatio
0,KB금융,KR7105560007,A105560,47650.0,FALL,50.0,-0.001048,1058309,50641965350,1.964860e+13,72.760
1,하나금융지주,KR7086790003,A086790,36800.0,RISE,700.0,0.019391,1256264,46069938550,1.088925e+13,72.500
2,카카오뱅크,KR7323410001,A323410,30050.0,RISE,150.0,0.005017,1230340,37293157250,1.431334e+13,12.330
3,카카오페이,KR7377300009,A377300,61600.0,FALL,2700.0,-0.041991,541334,34092731100,8.163781e+12,43.470
4,신한지주,KR7055550008,A055550,35700.0,RISE,50.0,0.001403,922172,32900002100,1.831175e+13,61.940
...,...,...,...,...,...,...,...,...,...,...,...
131,신영증권우,KR7001721000,A001725,57500.0,EVEN,0.0,0.000000,27,1547300,4.055914e+11,0.000
132,하이트진로홀딩스우,KR7000141002,A000145,14800.0,EVEN,0.0,0.000000,58,856200,6.967988e+09,0.000
133,삼양홀딩스우,KR7000071001,A000075,60500.0,FALL,100.0,-0.001650,14,841800,1.839551e+10,0.000
134,부국증권우,KR7001271006,A001275,20100.0,RISE,100.0,0.005000,8,160750,6.030000e+10,0.000
