# 2차전지지수 DataSet(KRX K뉴딜지수)

In [6]:
# 필요패키지 import
import numpy as np
import pandas as pd
import time
from datetime import datetime
import csv # csv 파일 저장
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline 
# encoding ='cp949'를 통해 한글깨짐현상 완료 
df = pd.read_csv('./data/2차전지지수_data_20150102.csv', encoding='cp949')

In [7]:
# 한글폰트 사용
import platform
from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus'] = False

if platform.system() == 'Darwin':
    f_path = '/Library/Fonts/Arial Unicode.ttf'
elif platform.system() == 'Windows':
    f_path = 'c:/Windows/Fonts/malgun.ttf'
font_name = font_manager.FontProperties(fname=f_path).get_name()
rc('font', family=font_name)

print('Hangul font is set!')

Hangul font is set!


In [8]:
# 필요없는 데이터 확인
df.head(5)

Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2021/10/05,6280.61,-109.15,-1.71,6312.5,6379.8,6183.26,5224.0,1356660.0,182082695.0
1,2021/10/01,6389.76,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0
2,2021/09/30,6447.22,10.44,0.16,6393.5,6464.66,6327.46,8484.0,1985004.0,188226567.0
3,2021/09/29,6436.78,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0
4,2021/09/28,6419.86,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0


In [9]:
# 일자별로 역순으로 재정렬
df.sort_values('일자', ascending=True, inplace=True)
df

Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
1661,2015/01/02,1000.00,,,,,,,,
1660,2015/01/05,985.34,-14.66,-1.47,,,,,,
1659,2015/01/06,949.62,-35.72,-3.63,,,,,,
1658,2015/01/07,946.38,-3.24,-0.34,,,,,,
1657,2015/01/08,966.28,19.90,2.10,,,,,,
...,...,...,...,...,...,...,...,...,...,...
4,2021/09/28,6419.86,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0
3,2021/09/29,6436.78,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0
2,2021/09/30,6447.22,10.44,0.16,6393.50,6464.66,6327.46,8484.0,1985004.0,188226567.0
1,2021/10/01,6389.76,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0


In [10]:
# index 재정렬 (drop을 통해 이전 index 삭제)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2015/01/02,1000.00,,,,,,,,
1,2015/01/05,985.34,-14.66,-1.47,,,,,,
2,2015/01/06,949.62,-35.72,-3.63,,,,,,
3,2015/01/07,946.38,-3.24,-0.34,,,,,,
4,2015/01/08,966.28,19.90,2.10,,,,,,
...,...,...,...,...,...,...,...,...,...,...
1657,2021/09/28,6419.86,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0
1658,2021/09/29,6436.78,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0
1659,2021/09/30,6447.22,10.44,0.16,6393.50,6464.66,6327.46,8484.0,1985004.0,188226567.0
1660,2021/10/01,6389.76,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0


In [11]:
# '월별' column을 생성 후 입력

# datetime을 이용해 출원날짜 데이터 형식 변경
df['일자'] = pd.to_datetime(df['일자'])

# '출원년월' column 생성
df.insert(1,'월별', 0, allow_duplicates=False)

# '출원년월' column에 %Y-%m 형식으로 변경
df['월별'] = df['일자'].dt.strftime('%Y-%m')
df

Unnamed: 0,일자,월별,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2015-01-02,2015-01,1000.00,,,,,,,,
1,2015-01-05,2015-01,985.34,-14.66,-1.47,,,,,,
2,2015-01-06,2015-01,949.62,-35.72,-3.63,,,,,,
3,2015-01-07,2015-01,946.38,-3.24,-0.34,,,,,,
4,2015-01-08,2015-01,966.28,19.90,2.10,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
1657,2021-09-28,2021-09,6419.86,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0
1658,2021-09-29,2021-09,6436.78,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0
1659,2021-09-30,2021-09,6447.22,10.44,0.16,6393.50,6464.66,6327.46,8484.0,1985004.0,188226567.0
1660,2021-10-01,2021-10,6389.76,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1662 entries, 0 to 1661
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   일자      1662 non-null   datetime64[ns]
 1   월별      1662 non-null   object        
 2   종가      1662 non-null   float64       
 3   대비      1661 non-null   float64       
 4   등락률     1661 non-null   float64       
 5   시가      265 non-null    float64       
 6   고가      265 non-null    float64       
 7   저가      265 non-null    float64       
 8   거래량     265 non-null    float64       
 9   거래대금    265 non-null    float64       
 10  상장시가총액  265 non-null    float64       
dtypes: datetime64[ns](1), float64(9), object(1)
memory usage: 143.0+ KB


In [13]:
# '월별 평균종가' column 생성
df.insert(3,'월별 평균종가', 0, allow_duplicates=False)
df

Unnamed: 0,일자,월별,종가,월별 평균종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액
0,2015-01-02,2015-01,1000.00,0,,,,,,,,
1,2015-01-05,2015-01,985.34,0,-14.66,-1.47,,,,,,
2,2015-01-06,2015-01,949.62,0,-35.72,-3.63,,,,,,
3,2015-01-07,2015-01,946.38,0,-3.24,-0.34,,,,,,
4,2015-01-08,2015-01,966.28,0,19.90,2.10,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1657,2021-09-28,2021-09,6419.86,0,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0
1658,2021-09-29,2021-09,6436.78,0,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0
1659,2021-09-30,2021-09,6447.22,0,10.44,0.16,6393.50,6464.66,6327.46,8484.0,1985004.0,188226567.0
1660,2021-10-01,2021-10,6389.76,0,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0


In [15]:
# '월별 평균종가'를 구성한 데이터 프레임
df_mean = df.groupby('월별')['종가'].agg(**{'월별 평균종가':'mean'}).reset_index()
df_mean

Unnamed: 0,월별,월별 평균종가
0,2015-01,1014.863333
1,2015-02,1142.924118
2,2015-03,1181.204091
3,2015-04,1266.055455
4,2015-05,1252.776667
...,...,...
77,2021-06,5794.975909
78,2021-07,6136.429091
79,2021-08,6158.736190
80,2021-09,6161.327368


In [16]:
# 다른 두 개의 데이터프레임 merge
df_merge = pd.merge(df, df_mean, how ='outer', on='월별')
df_merge

Unnamed: 0,일자,월별,종가,월별 평균종가_x,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액,월별 평균종가_y
0,2015-01-02,2015-01,1000.00,0,,,,,,,,,1014.863333
1,2015-01-05,2015-01,985.34,0,-14.66,-1.47,,,,,,,1014.863333
2,2015-01-06,2015-01,949.62,0,-35.72,-3.63,,,,,,,1014.863333
3,2015-01-07,2015-01,946.38,0,-3.24,-0.34,,,,,,,1014.863333
4,2015-01-08,2015-01,966.28,0,19.90,2.10,,,,,,,1014.863333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,2021-09-28,2021-09,6419.86,0,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0,6161.327368
1658,2021-09-29,2021-09,6436.78,0,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0,6161.327368
1659,2021-09-30,2021-09,6447.22,0,10.44,0.16,6393.50,6464.66,6327.46,8484.0,1985004.0,188226567.0,6161.327368
1660,2021-10-01,2021-10,6389.76,0,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0,6335.185000


In [17]:
# 불필요한 컬럼 제거
df_merge.drop(['월별 평균종가_x'], axis=1, inplace=True)
df_merge

Unnamed: 0,일자,월별,종가,대비,등락률,시가,고가,저가,거래량,거래대금,상장시가총액,월별 평균종가_y
0,2015-01-02,2015-01,1000.00,,,,,,,,,1014.863333
1,2015-01-05,2015-01,985.34,-14.66,-1.47,,,,,,,1014.863333
2,2015-01-06,2015-01,949.62,-35.72,-3.63,,,,,,,1014.863333
3,2015-01-07,2015-01,946.38,-3.24,-0.34,,,,,,,1014.863333
4,2015-01-08,2015-01,966.28,19.90,2.10,,,,,,,1014.863333
...,...,...,...,...,...,...,...,...,...,...,...,...
1657,2021-09-28,2021-09,6419.86,4.25,0.07,6442.43,6457.56,6329.02,7517.0,1782063.0,186942442.0,6161.327368
1658,2021-09-29,2021-09,6436.78,16.92,0.26,6304.77,6444.19,6274.18,8342.0,2044538.0,187119598.0,6161.327368
1659,2021-09-30,2021-09,6447.22,10.44,0.16,6393.50,6464.66,6327.46,8484.0,1985004.0,188226567.0,6161.327368
1660,2021-10-01,2021-10,6389.76,-57.46,-0.89,6421.44,6455.34,6334.56,4634.0,1149764.0,186641979.0,6335.185000


In [18]:
# column명 변경
df_merge.rename(columns = {'월별 평균종가_y':'월별 평균종가'}, inplace=True)

In [19]:
# 데이터프레임 저장 (.gz를 통해 압축저장 가능)
df_merge.to_csv(f'./data/2차전지지수_DataSet_20150102.csv', index=False, encoding='utf-8-sig')