# Motion Chart 01 - Data Preparation

Motion Bubble Chart Data Preparation

<img src="http://wiki.stat.ucla.edu/socr/uploads/b/ba/SOCR_Activities_MotionCharts_HPI_070109_Fig6_9_Animation.gif" style="float:left">


#### 이승준 http://fb.com/plusjune

# 1995~2015 시가총액
https://gist.githubusercontent.com/plusjune/d5c30a3377d43cd53884
    

In [2]:
%matplotlib inline

import pandas as pd
import matplotlib.pylab as plt
from pandas_datareader import data, wb

gist_url_tmpl = "https://gist.githubusercontent.com/plusjune/d5c30a3377d43cd53884/" \
                "raw/e634b4af7616abebb6a1fb41e6c045396de1df20/marcap-%s.csv"

## 2015년 상위 200 종목의 코드와 종목명

In [3]:
this_year = '2015'
df_base = pd.read_csv( gist_url_tmpl % (this_year), thousands=",")
df_base = df_base.head(200)
df_base.head()

Unnamed: 0,순위,종목코드,종목명,현재가,대비,등락률,거래량,거래대금,시가총액(백만원),시가총액비중(%),상장주식수(천주),외국인 보유주식수,외국인 지분율(%)
0,1,5930,삼성전자,1295000,"상승 3,000",0.23,207148,268005,190752641,13.04,147299337,72858065,49.46
1,2,5380,현대차,151500,"하락 1,500",-0.98,697842,106227,33371887,2.28,220276479,96759116,43.93
2,3,15760,한국전력,50300,상승 350,0.7,1079479,54193,32290793,2.21,641964077,204358747,31.83
3,4,28260,삼성물산,147500,"하락 1,000",-0.67,278739,41335,27979281,1.91,189690043,18689887,9.85
4,5,5935,삼성전자우,1090000,"상승 5,000",0.46,44463,48372,24888435,1.7,22833427,17276502,75.66


In [3]:
df_base = df_base[['종목코드', '종목명']]
df_base.head()

Unnamed: 0,종목코드,종목명
0,5930,삼성전자
1,5380,현대차
2,15760,한국전력
3,28260,삼성물산
4,5935,삼성전자우


## 업종 조회하여 채워넣기

In [4]:
import pprint, pickle
import requests
from bs4 import BeautifulSoup


def get_sector(code):
    url = 'http://companyinfo.stock.naver.com/v1/company/c1010001.aspx?cmp_cd=' + code
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "lxml")
    sector = ""
    td = soup.find('td', {'class':'cmp-table-cell td0101'})
    dts = td.findAll('dt')
    sector = dts[2].text.split(':')[1].strip()
    return sector


In [5]:
df_sector = df_base.copy()
df_sector['업종'] = ''

In [6]:
for x, row in df_sector.iterrows():
    row['업종'] = get_sector(row['종목코드'])

In [7]:
df_sector.head(20)

Unnamed: 0,종목코드,종목명,업종
0,5930,삼성전자,전기전자
1,5380,현대차,운수장비
2,15760,한국전력,전기가스
3,28260,삼성물산,유통업
4,5935,삼성전자우,전기전자
5,12330,현대모비스,운수장비
6,90430,아모레퍼시픽,화학
7,660,SK하이닉스,전기전자
8,51910,LG화학,화학
9,32830,삼성생명,보험업


In [8]:
df_sector.columns = ['ID', 'Entity', 'Group']
df_sector = df_sector.set_index('Entity')
df_sector[['Group', 'ID']].to_csv('marcap_sectors.csv')

In [9]:
!head -10 marcap_sectors.csv

Entity,Group,ID
삼성전자,전기전자,005930
현대차,운수장비,005380
한국전력,전기가스,015760
삼성물산,유통업,028260
삼성전자우,전기전자,005935
현대모비스,운수장비,012330
아모레퍼시픽,화학,090430
SK하이닉스,전기전자,000660
LG화학,화학,051910


## 종목코드를 index로 지정

In [10]:
df_base = df_base[['종목코드', '종목명']]
df_base = df_base.set_index('종목코드')
df_base.head(10)

Unnamed: 0_level_0,종목명
종목코드,Unnamed: 1_level_1
5930,삼성전자
5380,현대차
15760,한국전력
28260,삼성물산
5935,삼성전자우
12330,현대모비스
90430,아모레퍼시픽
660,SK하이닉스
51910,LG화학
32830,삼성생명


In [12]:
df_base.head(20)

Unnamed: 0_level_0,종목명
종목코드,Unnamed: 1_level_1
5930,삼성전자
5380,현대차
15760,한국전력
28260,삼성물산
5935,삼성전자우
12330,현대모비스
90430,아모레퍼시픽
660,SK하이닉스
51910,LG화학
32830,삼성생명


In [22]:
def merge_column(df_base, year_start, year_end, merge_column = '현재가'):
    for y in range(year_start, year_end+1):
        dft = pd.read_csv( gist_url_tmpl % (str(y)), thousands=",")
        dft = dft[['종목코드', merge_column]]
        dft = dft.set_index('종목코드')
        dft.columns = [str(y)]
        df_base = df_base.join(dft)
    return df_base

In [23]:
df = merge_column(df_base, 2000, 2015, '현재가')
df = df.set_index('종목명')
df.head(20)

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
종목명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
삼성전자,158000.0,279000.0,314000.0,451000.0,450500.0,659000.0,613000.0,556000.0,451000.0,799000.0,949000.0,1061000.0,1522000.0,1372000.0,1327000,1295000
현대차,12100.0,26900.0,27750.0,50500.0,55500.0,97300.0,67400.0,71600.0,39500.0,121000.0,173500.0,213000.0,218500.0,236500.0,169000,151500
한국전력,23600.0,21700.0,18250.0,21400.0,26850.0,37800.0,42400.0,39650.0,29600.0,34100.0,30200.0,26000.0,30450.0,34750.0,42700,50300
삼성물산,,,,,,,,,,,,,,,158000,147500
삼성전자우,69000.0,113500.0,150000.0,245500.0,298500.0,491000.0,480000.0,428000.0,258000.0,525000.0,649000.0,656000.0,852000.0,1013000.0,1039000,1090000
현대모비스,4985.0,18900.0,21800.0,64100.0,65500.0,92400.0,85900.0,87200.0,63100.0,171000.0,284500.0,290500.0,288000.0,293500.0,236000,250500
아모레퍼시픽,,,,,,,580000.0,710000.0,654000.0,934000.0,1139000.0,1081000.0,1214000.0,1000000.0,2220000,414500
SK하이닉스,4025.0,2420.0,280.0,5600.0,11650.0,35300.0,36450.0,25950.0,6700.0,23150.0,24000.0,21150.0,25750.0,36800.0,47750,32700
LG화학,,21750.0,40600.0,55000.0,41250.0,57000.0,43100.0,89600.0,71000.0,228500.0,391000.0,320000.0,330000.0,299500.0,181000,339500
삼성생명,,,,,,,,,,,102500.0,81700.0,94300.0,104000.0,116500,109500


In [24]:
df.to_csv('marcap_price.csv')

In [25]:
!head -10 marcap_price.csv

종목명,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
삼성전자,158000.0,279000.0,314000.0,451000.0,450500.0,659000.0,613000.0,556000.0,451000.0,799000.0,949000.0,1061000.0,1522000.0,1372000.0,1327000.0,1295000
현대차,12100.0,26900.0,27750.0,50500.0,55500.0,97300.0,67400.0,71600.0,39500.0,121000.0,173500.0,213000.0,218500.0,236500.0,169000.0,151500
한국전력,23600.0,21700.0,18250.0,21400.0,26850.0,37800.0,42400.0,39650.0,29600.0,34100.0,30200.0,26000.0,30450.0,34750.0,42700.0,50300
삼성물산,,,,,,,,,,,,,,,158000.0,147500
삼성전자우,69000.0,113500.0,150000.0,245500.0,298500.0,491000.0,480000.0,428000.0,258000.0,525000.0,649000.0,656000.0,852000.0,1013000.0,1039000.0,1090000
현대모비스,4985.0,18900.0,21800.0,64100.0,65500.0,92400.0,85900.0,87200.0,63100.0,171000.0,284500.0,290500.0,288000.0,293500.0,236000.0,250500
아모레퍼시픽,,,,,,,580000.0,710000.0,654000.0,934000.0,1139000.0,1081000.0,1214000.0,1000000.0,2220000.0,414500
SK하이닉스,4025.0,2420.0,280.0,5600.0,11650.0,35300.0,36450.0,2

In [26]:
len(df)

200

In [27]:
df = merge_column(df_base, 2000, 2015, '시가총액(백만원)')
df = df.set_index('종목명')
df.to_csv('marcap_cap.csv')
df.head(10)

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
종목명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
삼성전자,23895911.0,42220610.0,47958513.0,68034797.0,66358351.0,97070263.0,90294494.0,81898431.0,66432001.0,117692170.0,139787071.0,156284597.0,224189591.0,202094690.0,195466220,190752641
현대차,2771973.0,5893486.0,6079711.0,11085684.0,12133871.0,21314412.0,14792026.0,15742965.0,8700921.0,26653454.0,38217969.0,46918890.0,48130411.0,52095387.0,37226725,33371887
한국전력,15106381.0,13890189.0,11681841.0,13698159.0,17204099.0,24220296.0,27202471.0,25438160.0,18990404.0,21877459.0,19375345.0,16691066.0,19547806.0,22308252.0,27411866,32290793
삼성물산,,,,,,,,,,,,,,,21330000,27979281
삼성전자우,1648646.0,2711904.0,3584014.0,5750451.0,6815778.0,11211213.0,10960045.0,9772707.0,5891024.0,11987549.0,14818894.0,14978728.0,19454080.0,23130262.0,23723931,24888435
현대모비스,393399.0,1492702.0,1856897.0,5436833.0,5601509.0,7918922.0,7371248.0,7628121.0,5526993.0,16645801.0,27694329.0,28278392.0,28035033.0,28570424.0,22973152,24384638
아모레퍼시픽,,,,,,,3390592.0,4150553.0,3823185.0,5460023.0,6658422.0,6319363.0,7096861.0,5845849.0,12977785,24231044
SK하이닉스,1973986.0,2447240.0,1467192.0,2484839.0,5182685.0,15806375.0,16731351.0,11916983.0,3079339.0,13650133.0,14166637.0,12524429.0,17874434.0,26135393.0,34762113,23805677
LG화학,,1401245.0,2615658.0,3543379.0,2657534.0,3672229.0,2776720.0,6741394.0,5341953.0,15142946.0,25912000.0,21206752.0,21869463.0,19848194.0,11995069,22499038
삼성생명,,,,,,,,,,,20500000.0,16340000.0,18860000.0,20800000.0,23300000,21900000


In [28]:
df.min()
df.max()

2000     23895911
2001     42220610
2002     47958513
2003     68034797
2004     66358351
2005     97070263
2006     90294494
2007     81898431
2008     66432001
2009    117692170
2010    139787071
2011    156284597
2012    224189591
2013    202094690
2014    195466220
2015    190752641
dtype: float64

In [29]:
df = merge_column(df_base, 2000, 2015, '외국인 지분율(%)')
df = df.set_index('종목명')
df.to_csv('marcap_foreign_ratio.csv')

In [30]:
df.head()

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
종목명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
삼성전자,54.16,59.63,53.9,57.3,54.13,53.83,49.08,46.9,43.07,47.71,50.58,50.4,50.4,49.67,51.81,49.46
현대차,40.98,52.76,47.19,51.25,55.83,45.1,40.79,33.16,26.57,36.4,42.75,42.24,45.85,45.16,43.59,43.93
한국전력,26.14,26.58,24.95,28.93,30.51,29.95,29.28,27.94,24.29,25.46,23.66,23.97,25.77,23.57,29.35,31.83
삼성물산,,,,,,,,,,,,,,,2.08,9.85
삼성전자우,53.19,64.47,60.67,77.72,80.58,81.87,80.89,80.68,79.72,81.42,82.86,82.22,82.46,82.93,80.18,75.66


In [None]:
#  End of Document