In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_growth = pd.read_csv('../datasets/data_growth.csv', dtype={'거래소코드':'object'}, index_col=0)

In [3]:
data_growth.head()

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원)
5384,2008,삼성전자(주),5930,2007/12,66432000.0,15.74,11.46,49532.0
5385,2008,포스코홀딩스(주),5490,2007/12,33131000.0,18.13,6.74,48444.0
5386,2008,한국전력공사(주),15760,2007/12,18990400.0,19.86,15.08,2504.0
5387,2008,에스케이텔레콤(주),17670,2007/12,16969490.0,13.77,9.27,22607.0
5388,2008,에이치디한국조선해양(주),9540,2007/12,15162000.0,24.08,5.25,27778.0


### (성장주) - PEG 구현

- 데이터프레임명 : `data_growth`

- 파생변수 생성(데이터프레임명: `df_peg`)
    - `1Y_EPS` : EPS 증가율(=(올해EPS - 작년EPS) / 작년EPS)
    - `3Y_EPS_mean` : 3년 평균 EPS 증가율(= 기하평균(올해EPS증가율+1, 작년EPS증가율+1, 제작년EPS증가율+1) - 1)
    - `PEG` : PER / 3년 평균 EPS 증가율(%)

- 조건식 생성
    - `PEG`이 양수이고, `PEG`<=0.75인 종목

- 유의사항
    - `3Y_EPS_mean` 결측치 제거로인해 PEG 포트폴리오는 2011부터 생성 가능

In [4]:
data_growth

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원)
5384,2008,삼성전자(주),005930,2007/12,6.643200e+07,15.74,11.46,49532.0
5385,2008,포스코홀딩스(주),005490,2007/12,3.313100e+07,18.13,6.74,48444.0
5386,2008,한국전력공사(주),015760,2007/12,1.899040e+07,19.86,15.08,2504.0
5387,2008,에스케이텔레콤(주),017670,2007/12,1.696949e+07,13.77,9.27,22607.0
5388,2008,에이치디한국조선해양(주),009540,2007/12,1.516200e+07,24.08,5.25,27778.0
...,...,...,...,...,...,...,...,...
17333,2023,제이준코스메틱(주),025620,2022/12,2.426302e+04,0.00,0.00,-7343.0
17334,2023,(주)대유플러스,000300,2022/12,2.086506e+04,0.00,0.00,-407.0
17335,2023,비케이탑스(주),030790,2022/12,1.920832e+04,0.00,0.00,-378.0
17336,2023,(주)코아스,071950,2022/12,1.863705e+04,0.00,0.00,-72.0


In [5]:
co_name = data_growth['거래소코드'].unique().tolist()
len(co_name), co_name[:5]

(863, ['005930', '005490', '015760', '017670', '009540'])

In [6]:
del_name = []

for name in co_name:
    # 종목별 데이터 필터링
    dfs = data_growth[data_growth['거래소코드'] == name]
    dfs.dropna(inplace=True)

    sorted_years = dfs['year'].sort_values()

    is_continuous = all(sorted_years.diff().fillna(1) == 1)
    
    if not is_continuous:
        print(f'연속성이 없음 : {name}')
        del_name.append(name)

연속성이 없음 : 008000
연속성이 없음 : 011330


In [7]:
del_name

['008000', '011330']

In [8]:
# 거래소코드 : 008000
# 2016 데이터 누락

data_growth[data_growth['거래소코드']==del_name[0]]

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원)
5559,2008,도레이케미칼(주),8000,2007/12,282693.68018,0.0,0.0,-1257.0
6232,2009,도레이케미칼(주),8000,2008/12,543258.43353,228.92,65.99,47.0
6947,2010,도레이케미칼(주),8000,2009/12,571726.12437,499.58,45.79,22.0
7685,2011,도레이케미칼(주),8000,2010/12,455483.05344,11.77,6.69,127.0
8464,2012,도레이케미칼(주),8000,2011/12,336137.92265,177.22,90.97,9.0
9156,2013,도레이케미칼(주),8000,2012/12,505365.9759,210.08,83.68,6.0
9882,2014,도레이케미칼(주),8000,2013/12,586502.71515,1620.35,98.19,75.0
10589,2015,도레이케미칼(주),8000,2014/12,892504.13175,0.0,0.0,-750.0
12071,2017,도레이케미칼(주),8000,2016/03,913367.8647,110.76,72.09,249.0


In [9]:
# 거래소코드 : 011330
# 2015 데이터 누락
data_growth[data_growth['거래소코드']==del_name[1]]

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원)
5937,2008,(주)유니켐,11330,2007/12,24131.8413,0.0,0.0,-89.0
6690,2009,(주)유니켐,11330,2008/12,27838.923475,0.0,0.0,-35.0
7428,2010,(주)유니켐,11330,2009/12,26757.41134,0.0,0.0,-2246.0
8171,2011,(주)유니켐,11330,2010/12,22271.13878,9.15,2.22,109.0
8949,2012,(주)유니켐,11330,2011/12,13080.0,0.0,0.0,-214.0
9613,2013,(주)유니켐,11330,2012/12,29877.0,0.0,0.0,-315.0
10382,2014,(주)유니켐,11330,2013/12,23329.232256,0.0,0.0,-219.0
11765,2016,(주)유니켐,11330,2015/06,80803.710515,0.0,0.0,-559.0
12528,2017,(주)유니켐,11330,2016/12,67039.040585,70.91,40.13,36.0
13225,2018,(주)유니켐,11330,2017/12,89132.574245,41.35,26.82,47.0


In [10]:
# !pip install scipy

from scipy.stats import gmean

In [11]:
# 기하평균 테스트
gmean([1, 2, 3]) #= sum([1,2,3]) ** (1/3)

1.8171205928321397

In [12]:
def make_indicator(회사명, 제외할회사명):
    df_list = []
    for i in 회사명:
        
        if i in 제외할회사명: continue

        df = data_growth[data_growth['거래소코드'] == i].reset_index(drop=True)
        df.dropna(inplace=True) 

        df['1Y_EPS'] = ((df['1주당순이익(원)'] - df['1주당순이익(원)'].shift(1)) / df['1주당순이익(원)'].shift(1))
        df['3Y_EPS_mean'] = gmean(
            [df['1Y_EPS']  + 1, df['1Y_EPS'].shift(1) + 1, df['1Y_EPS'].shift(2) + 1]
        ) - 1
        
        # df['PER_mean'] = (df['PER(최고)'] + df['PER(최저)']) / 2
        df['PEG'] = df['PER(최고)'] / (df['3Y_EPS_mean'] * 100)
        
        # df.dropna(inplace=True)

        df_list.append(df)
    return df_list

In [13]:
# 종목별 PEG
dfs_peg = make_indicator(co_name, del_name)

# 전체종목
df_peg = pd.concat(dfs_peg)

In [14]:
dfs_peg[0]

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원),1Y_EPS,3Y_EPS_mean,PEG
0,2008,삼성전자(주),5930,2007/12,66432000.0,15.74,11.46,49532.0,,,
1,2009,삼성전자(주),5930,2008/12,117692200.0,23.52,12.41,37684.0,-0.239199,,
2,2010,삼성전자(주),5930,2009/12,139787100.0,22.72,12.02,42137.0,0.118167,,
3,2011,삼성전자(주),5930,2010/12,155842700.0,12.25,9.42,88799.0,1.107388,0.214807,0.570278
4,2012,삼성전자(주),5930,2011/12,224189600.0,18.39,11.4,66868.0,-0.246974,0.210655,0.872991
5,2013,삼성전자(주),5930,2012/12,202094700.0,15.02,9.89,115576.0,0.72842,0.399807,0.375682
6,2014,삼성전자(주),5930,2013/12,195466200.0,15.03,11.47,118946.0,0.029158,0.102336,1.468694
7,2015,삼성전자(주),5930,2014/12,185597200.0,17.43,12.57,96784.0,-0.18632,0.131171,1.328796
8,2016,삼성전자(주),5930,2015/12,253504200.0,20.99,14.36,82682.0,-0.145706,-0.105636,-1.987017
9,2017,삼성전자(주),5930,2016/12,328943000.0,26.89,15.99,81602.0,-0.013062,-0.118037,-2.278097


In [15]:
# 1Y_EPS, 3Y_EPS_mean, PEG 결측치 제거
df_peg.dropna(inplace=True)
df_peg.head()

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원),1Y_EPS,3Y_EPS_mean,PEG
3,2011,삼성전자(주),5930,2010/12,155842700.0,12.25,9.42,88799.0,1.107388,0.214807,0.570278
4,2012,삼성전자(주),5930,2011/12,224189600.0,18.39,11.4,66868.0,-0.246974,0.210655,0.872991
5,2013,삼성전자(주),5930,2012/12,202094700.0,15.02,9.89,115576.0,0.72842,0.399807,0.375682
6,2014,삼성전자(주),5930,2013/12,195466200.0,15.03,11.47,118946.0,0.029158,0.102336,1.468694
7,2015,삼성전자(주),5930,2014/12,185597200.0,17.43,12.57,96784.0,-0.18632,0.131171,1.328796


In [16]:
# 3Y_EPS_mean 결측치 제거로 PEG 포트폴리오는 2011부터 생성 가능
df_peg['year'].unique().min()

2011

In [17]:
# PEG 조건식 생상
df_peg['조건'] = (df_peg['PEG']>0) & (df_peg['PEG']<=0.75)

# 연도별 적합한 종목 수
df_peg[df_peg['조건']]['year'].value_counts()

2023    129
2011    109
2022    104
2018     87
2012     87
2017     74
2021     73
2019     65
2013     64
2014     61
2016     59
2020     56
2015     40
Name: year, dtype: int64

In [18]:
df_peg

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원),1Y_EPS,3Y_EPS_mean,PEG,조건
3,2011,삼성전자(주),005930,2010/12,1.558427e+08,12.25,9.42,88799.0,1.107388,0.214807,0.570278,True
4,2012,삼성전자(주),005930,2011/12,2.241896e+08,18.39,11.40,66868.0,-0.246974,0.210655,0.872991,False
5,2013,삼성전자(주),005930,2012/12,2.020947e+08,15.02,9.89,115576.0,0.728420,0.399807,0.375682,True
6,2014,삼성전자(주),005930,2013/12,1.954662e+08,15.03,11.47,118946.0,0.029158,0.102336,1.468694,False
7,2015,삼성전자(주),005930,2014/12,1.855972e+08,17.43,12.57,96784.0,-0.186320,0.131171,1.328796,False
...,...,...,...,...,...,...,...,...,...,...,...,...
3,2023,효성티앤씨(주),298020,2022/12,1.579604e+06,9.11,3.86,67129.0,0.049711,4.284042,0.021265,True
3,2023,한일시멘트(주),300720,2022/12,8.858551e+05,28.35,10.84,946.0,-0.067980,-0.540835,-0.524190,False
3,2023,교촌에프앤비(주),339770,2022/12,1.856203e+05,169.64,83.26,106.0,-0.880765,-0.527680,-3.214825,False
3,2023,(주)세아제강,306200,2022/12,3.902749e+05,3.74,1.58,56889.0,0.742923,0.865131,0.043230,True


In [19]:
df_peg.reset_index(drop=True, inplace=True)
df_peg

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원),1Y_EPS,3Y_EPS_mean,PEG,조건
0,2011,삼성전자(주),005930,2010/12,1.558427e+08,12.25,9.42,88799.0,1.107388,0.214807,0.570278,True
1,2012,삼성전자(주),005930,2011/12,2.241896e+08,18.39,11.40,66868.0,-0.246974,0.210655,0.872991,False
2,2013,삼성전자(주),005930,2012/12,2.020947e+08,15.02,9.89,115576.0,0.728420,0.399807,0.375682,True
3,2014,삼성전자(주),005930,2013/12,1.954662e+08,15.03,11.47,118946.0,0.029158,0.102336,1.468694,False
4,2015,삼성전자(주),005930,2014/12,1.855972e+08,17.43,12.57,96784.0,-0.186320,0.131171,1.328796,False
...,...,...,...,...,...,...,...,...,...,...,...,...
5004,2023,효성티앤씨(주),298020,2022/12,1.579604e+06,9.11,3.86,67129.0,0.049711,4.284042,0.021265,True
5005,2023,한일시멘트(주),300720,2022/12,8.858551e+05,28.35,10.84,946.0,-0.067980,-0.540835,-0.524190,False
5006,2023,교촌에프앤비(주),339770,2022/12,1.856203e+05,169.64,83.26,106.0,-0.880765,-0.527680,-3.214825,False
5007,2023,(주)세아제강,306200,2022/12,3.902749e+05,3.74,1.58,56889.0,0.742923,0.865131,0.043230,True


### 포트폴리오 구성
- [참고사이트](https://gentlesark.tistory.com/115)
- PEG 전략
    - PEG < 0.5 : 
    - 과거 3년간 EPS 증가율 > 25%
    - ~~부채비율 < 100%~~
    - 최대 20종목

In [20]:
# 조건(0<PEG<0.75)에 맞는 데이터만 남기고 모두 제거
df_peg = df_peg[df_peg['조건']]
df_peg.shape

(1008, 12)

In [21]:
# 조건1 : PEG < 0.5
df_peg['flag1'] = (df_peg['PEG']<0.5)
df_peg[df_peg['flag1']]['year'].value_counts()

2023    97
2011    86
2022    84
2012    66
2018    63
2021    55
2017    51
2019    51
2016    49
2014    49
2013    46
2020    41
2015    25
Name: year, dtype: int64

In [22]:
# 조건2 : 3년 평균 EPS 증가율 > 25%
df_peg['flag2'] = (df_peg['3Y_EPS_mean']>0.25)
df_peg[df_peg['flag2']]['year'].value_counts()

2023    104
2022     89
2011     79
2018     72
2012     65
2017     60
2021     57
2019     54
2016     50
2014     46
2013     45
2020     39
2015     27
Name: year, dtype: int64

In [23]:
# 조건3 : 조건1&조건2
df_peg['flag3'] = df_peg['flag1'] & df_peg['flag2']

df_peg[df_peg['flag3']]['year'].value_counts()

2023    86
2022    78
2011    70
2018    58
2012    57
2021    48
2019    47
2017    46
2016    44
2014    42
2013    39
2020    34
2015    22
Name: year, dtype: int64

#### 연도별 포트폴리오 생성

- 최대 20종목
    - PEG 낮은 순서로

In [24]:
years = sorted(df_peg['year'].unique())

portfolios = []
for year in years:
    df_peg_by_year = df_peg[(df_peg['year']==year)]
    df_peg_by_year = df_peg_by_year.sort_values('PEG') # 오름차순 정렬
    df_peg_by_year = df_peg_by_year.head(20) # PEG 낮은 순서대로 최대 20개 추출
    
    # 시가총액 기준 투자 비중 계산
    df_peg_by_year['pf_ratio'] = df_peg_by_year['시가총액(백만원)'] / df_peg_by_year['시가총액(백만원)'].sum()
    
    portfolios.append(df_peg_by_year)
    
df_portfolios = pd.concat(portfolios).reset_index(drop=True)
# df_portfolios = df_portfolios.loc[:, :'pf_ratio']
df_portfolios

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원),1Y_EPS,3Y_EPS_mean,PEG,조건,flag1,flag2,flag3,pf_ratio
0,2011,성문전자(주),014910,2010/12,2.778839e+04,16.36,5.49,279.0,1.682692,4.186305,0.039080,True,True,True,True,0.000923
1,2011,(주)세원정공,021820,2010/06,1.180000e+05,2.58,0.83,42957.0,1.880700,0.616006,0.041883,True,True,True,True,0.003918
2,2011,기아(주),000270,2010/12,2.693445e+07,14.93,5.14,3625.0,-0.082046,3.529822,0.042297,True,True,True,True,0.894270
3,2011,(주)세아제강지주,003030,2010/12,3.972000e+05,3.30,1.92,17075.0,6.375810,0.654817,0.050396,True,True,True,True,0.013188
4,2011,일신방직(주),003200,2010/12,1.968000e+05,5.69,3.00,20397.0,0.386513,0.934380,0.060896,True,True,True,True,0.006534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,2023,디에스알제강(주),069730,2022/12,6.019200e+04,3.66,1.95,2009.0,2.797732,0.624061,0.058648,True,True,True,True,0.003860
256,2023,대한제강(주),084010,2022/12,3.025140e+05,5.09,2.07,6773.0,0.523049,0.850987,0.059813,True,True,True,True,0.019401
257,2023,(주)노루홀딩스,000320,2022/12,1.523166e+05,6.98,4.03,3129.0,4.837687,1.103885,0.063231,True,True,True,True,0.009768
258,2023,해성디에스(주),195870,2022/12,9.418000e+05,7.94,3.73,9406.0,1.194587,1.164142,0.068205,True,True,True,True,0.060399


In [25]:
# 연도별 포트폴리오 편입 종목 수

df_portfolios['year'].value_counts()

2011    20
2012    20
2013    20
2014    20
2015    20
2016    20
2017    20
2018    20
2019    20
2020    20
2021    20
2022    20
2023    20
Name: year, dtype: int64

In [26]:
# 특정년도의 포트폴리오 구성 종목 확인
df_portfolios[df_portfolios['year']==2023]

Unnamed: 0,year,회사명,거래소코드,회계년도,시가총액(백만원),PER(최고),PER(최저),1주당순이익(원),1Y_EPS,3Y_EPS_mean,PEG,조건,flag1,flag2,flag3,pf_ratio
240,2023,효성티앤씨(주),298020,2022/12,1579604.0,9.11,3.86,67129.0,0.049711,4.284042,0.021265,True,True,True,True,0.101302
241,2023,(주)이원,17940,2022/12,427378.0,2.91,2.05,24076.0,0.701845,1.046739,0.027801,True,True,True,True,0.027408
242,2023,(주)엘엑스인터내셔널,1120,2022/12,1139544.0,3.64,1.64,14695.0,0.518706,1.128906,0.032244,True,True,True,True,0.07308
243,2023,(주)엔아이스틸,8260,2022/12,171600.7,3.66,1.93,1706.0,1.212711,0.96748,0.03783,True,True,True,True,0.011005
244,2023,티와이엠(주),2900,2022/12,248681.3,5.9,2.16,671.0,1.045732,1.518592,0.038852,True,True,True,True,0.015948
245,2023,문배철강(주),8420,2022/12,63970.94,4.84,2.39,1338.0,0.033205,1.227872,0.039418,True,True,True,True,0.004103
246,2023,(주)세아제강,306200,2022/12,390274.9,3.74,1.58,56889.0,0.742923,0.865131,0.04323,True,True,True,True,0.025029
247,2023,화천기공(주),850,2022/12,69630.0,2.67,1.86,15295.0,3.671655,0.614118,0.043477,True,True,True,True,0.004465
248,2023,(주)신화다이나믹스,1770,2022/12,25694.67,5.52,2.86,8905.0,0.867659,1.239341,0.04454,True,True,True,True,0.001648
249,2023,(주)풍산,103140,2022/12,1098552.0,5.75,3.72,6404.0,-0.262806,1.281852,0.044857,True,True,True,True,0.070451


In [27]:
df_portfolios.groupby('year')['pf_ratio'].sum()

year
2011    1.0
2012    1.0
2013    1.0
2014    1.0
2015    1.0
2016    1.0
2017    1.0
2018    1.0
2019    1.0
2020    1.0
2021    1.0
2022    1.0
2023    1.0
Name: pf_ratio, dtype: float64

In [28]:
# 파일로 저장
df_portfolios.to_csv('../datasets/portfolio_peg.csv')