## Pandas
* 행에 열 레이블을 부착한 n차원 행렬 자료구조를 제공하는 파이썬 라이브러리
* 지원하는 자료구조는 Series, DataFrame, Panel임
* 단, 0.20이후로 Panel은 deprecated 됨
* numpy 기반으로 구현되어 처리속도가 빠름

* pandas의 창시자 중 한 명은 헤지펀드 애널리스트로 일하며  
  파이썬에서 금율 시계열을 다루기 위한 목적으로 개발함
* pandas.pydata.org
* pip install pandas

In [85]:
import numpy as np
import pandas as pd

## pandas 자료구조 1 : series
* R의 벡터와 유사한 자료구조 : 1차원 배열
* pd.Series(데이터, 인덱스, 자료형)

In [86]:
pd.__version__

'1.3.3'

In [87]:
# 빈시리즈 생성

a = pd.Series()
a

  This is separate from the ipykernel package so we can avoid doing imports until


Series([], dtype: float64)

In [88]:
# numpy배열을 시리즈로 생성

b = pd.Series([1,2,3,4,5])
b   # (인덱스는 0부터)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [89]:
# 시리즈 생성시 인덱스 지정(index 속성)

c = pd.Series([6,7,8,9,10], index=[1,2,3,4,5])
c

1     6
2     7
3     8
4     9
5    10
dtype: int64

In [90]:
# 시리즈 객체가 지원하는 여러가지 속성
print(c.values)   # 시리즈의 요소값
print(c.index)    # 시리즈의 인덱스
print(c[2])       # 2번째값 (인덱스가 1부터 시작)
print(c[3:5])     # 3,4번째 값

[ 6  7  8  9 10]
Int64Index([1, 2, 3, 4, 5], dtype='int64')
7
4     9
5    10
dtype: int64


## 팬더스 indexer
* pandas에서 정수형 인덱스를 사용하는경우 파이썬의 slice 연산과 혼동할 위험 존재
* 따라서, pandas만의 특별한 요소지정방법 제공 - indexer
  + loc
  + iloc

In [91]:
d = pd.Series([9,8,7,6,5], index=['가','나','다','라','마'])

In [92]:
# 1번째 자료 지정
print(d[0])
print(d.iloc[0])
print(d.loc['가'])

9
9
9


In [93]:
# 2번째이후 모든 자료 지정
print(d[1:])
print(d.iloc[1:])     # d.iloc[1:5]
print(d.loc['나':])   # d.loc['나':'마']

나    8
다    7
라    6
마    5
dtype: int64
나    8
다    7
라    6
마    5
dtype: int64
나    8
다    7
라    6
마    5
dtype: int64


In [94]:
# 홀수위치의 모든자료 지정
print(d[::2])
print(d.iloc[::2])
print(d.loc[['가','다','마']])    # print(d.loc[::2])

가    9
다    7
마    5
dtype: int64
가    9
다    7
마    5
dtype: int64
가    9
다    7
마    5
dtype: int64


In [95]:
# dict 객체로 시리즈 객체 생성 (!!)
data = {'a':1, 'b':2, 'c':3, 'd':4, 'e':5}
e = pd.Series(data)
e

a    1
b    2
c    3
d    4
e    5
dtype: int64

## pandas 자료구조 2 : dataframe
* R의 데이터프레임과 유사한 자료구조 : 2차원 테이블
* pd.DataFrame(데이터, 인덱스, 컬럼레이블, 자료형)

In [96]:
# 빈 데이터프레임 객체 생성

f = pd.DataFrame()
f

In [97]:
# 리스트로 데이터프레임 객체 생성
data = [1,2,3,4,5]
g = pd.DataFrame(data)
g  # 컬럼명이 없음

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [98]:
g = pd.DataFrame(data, columns=['nums'])
g  # 컬럼명 출력

Unnamed: 0,nums
0,1
1,2
2,3
3,4
4,5


In [99]:
# 3행 2열짜리 배열로 dataframe 객체 생성
data = [['지현',99], ['혜교',76], ['수지',83]]
cols = ['이름', '점수']
idx = [1,2,3]
h = pd.DataFrame(data, columns=cols, index=idx)
h

Unnamed: 0,이름,점수
1,지현,99
2,혜교,76
3,수지,83


In [100]:
# dict로 dataFrame 객체 생성
data2 = { '이름':['지현','혜교','수지'],
         '점수':[99, 76, 83] }
i = pd.DataFrame(data2, index=idx)
i

Unnamed: 0,이름,점수
1,지현,99
2,혜교,76
3,수지,83


In [101]:
# serise로 dataframe 객체 생성

name = pd.Series(['지현','혜교','수지'])
jumsu = pd.Series([99, 76, 83])
data3 = {'이름':name, '점수':jumsu}
j = pd.DataFrame(data3)
j

Unnamed: 0,이름,점수
0,지현,99
1,혜교,76
2,수지,83


In [106]:
# 인덱스를 재설정하려면 reindex를 사용
# 기존에 존재하지 않는 인덱스가 추가되면 NaN 값이 입력됨

j.reindex(idx)
j

Unnamed: 0_level_0,점수
이름,Unnamed: 1_level_1
지현,99
혜교,76
수지,83


In [109]:
# 특정 컬럼을 인덱스를 재설정하려면 set_index를 사용

#j = j.set_index('이름')
j

Unnamed: 0_level_0,점수
이름,Unnamed: 1_level_1
지현,99
혜교,76
수지,83


In [111]:
# 인덱스를 설정하려면 index 속성을 사용

j.index = idx
j

Unnamed: 0,점수
1,99
2,76
3,83


In [151]:
# ex) ledership 데이터를 
# pandas의 dataframe으로 생성



date = [[1, '10/24/14', 'US', 'M', 32, 5,4,5,5,5],[2, '10/28/14', 'US', 'F', 45, 3,5,2,5,5],
       [3, '10/01/14', 'UK', 'F', 25, 3,5,5,5,2], [4, '10/12/14', 'UK', 'M', 39, 3,3,4,np.NaN,np.NaN],
       [5, '10/24/14', 'UK', 'F', 99, 2,2,1,2,1]]
cols1 = ['Manager', 'Date','Country','Gender','Age','q1','q2','q3','q4','q5']

k = pd.DataFrame(date, columns=cols1)
k

Unnamed: 0,Manager,Date,Country,Gender,Age,q1,q2,q3,q4,q5
0,1,10/24/14,US,M,32,5,4,5,5.0,5.0
1,2,10/28/14,US,F,45,3,5,2,5.0,5.0
2,3,10/01/14,UK,F,25,3,5,5,5.0,2.0
3,4,10/12/14,UK,M,39,3,3,4,,
4,5,10/24/14,UK,F,99,2,2,1,2.0,1.0


In [153]:
## 답안
manager = [1,2,3,4,5]
date = ['10/24/14','10/28/14','10/01/14','10/12/14','05/01/14']
country = ['US','US','UK','UK','UK']
gender = ['M','F','F','M','F']
age = [32, 45, 25, 39, 99]
q1 = [5,3,3,3,2]
q2 = [4,5,5,3,2]
q3 = [5,2,5,4,1]
q4 = [5,5,5,np.NaN,2]
q5 = [5,5,2,np.NaN,1]

data = { 'Manager':manager, 'Date':date, 'Country':country, 'Gender':gender, 
        'Age':age, 'q1':q1, 'q2':q2, 'q3':q3, 'q4':q4, 'q5':q5 }

managership = pd.DataFrame(data)
managership.index=[1,2,3,4,5]
managership

Unnamed: 0,Manager,Date,Country,Gender,Age,q1,q2,q3,q4,q5
1,1,10/24/14,US,M,32,5,4,5,5.0,5.0
2,2,10/28/14,US,F,45,3,5,2,5.0,5.0
3,3,10/01/14,UK,F,25,3,5,5,5.0,2.0
4,4,10/12/14,UK,M,39,3,3,4,,
5,5,05/01/14,UK,F,99,2,2,1,2.0,1.0


In [159]:
# 데이터프레임의 각 요소에 접근하기
# 나이컬럼 출력
print(managership['Age'])   # 객체명[컬럼명]
print(managership.Age)      # 객체명.컬럼명
print(managership.iloc[:, 4])     # 객체명.iloc[행,열]
print(managership.loc[:, 'Age'])  # 객체명.loc[행이름,컬럼명]

1    32
2    45
3    25
4    39
5    99
Name: Age, dtype: int64
1    32
2    45
3    25
4    39
5    99
Name: Age, dtype: int64
1    32
2    45
3    25
4    39
5    99
Name: Age, dtype: int64
1    32
2    45
3    25
4    39
5    99
Name: Age, dtype: int64


In [164]:
# 질문컬럼(q1~q5) 출력

print(managership[['q1','q2','q3','q4','q5']])   # 객체명[[컬럼명1, 컬럼명2 ...]]

print(managership.iloc[:, 5:10])   # 객체명.iloc[행, 시작:끝+1]

print(managership.loc[:, 'q1':'q5'])   # 객체명.loc[행, 시작컬럼명:끝컬럼명]

   q1  q2  q3   q4   q5
1   5   4   5  5.0  5.0
2   3   5   2  5.0  5.0
3   3   5   5  5.0  2.0
4   3   3   4  NaN  NaN
5   2   2   1  2.0  1.0
   q1  q2  q3   q4   q5
1   5   4   5  5.0  5.0
2   3   5   2  5.0  5.0
3   3   5   5  5.0  2.0
4   3   3   4  NaN  NaN
5   2   2   1  2.0  1.0
   q1  q2  q3   q4   q5
1   5   4   5  5.0  5.0
2   3   5   2  5.0  5.0
3   3   5   5  5.0  2.0
4   3   3   4  NaN  NaN
5   2   2   1  2.0  1.0


## 외부파일로 데이터프레임 만들기
* 외부 데이터파일을 이용해서 dataframe 객체를 만들수있음
* csv, excel, json, xml ... 등등 지원함
* pd.read_xxx(경로, 구분자, 헤더설정, 인코딩)

In [170]:
aw = pd.read_csv('data/applewood.txt', header=0, sep = ' ')
aw.head()

Unnamed: 0,Age,Profit,Location,Vehicle-Type,Previous
0,21,"$1,387",Tionesta,Sedan,0
1,23,1754,Sheffield,SUV,1
2,24,1817,Sheffield,Hybrid,1
3,25,1040,Sheffield,Compact,0
4,26,1273,Kane,Sedan,1


In [171]:
# 데이터프레임의 구조 알아보기
# 컬럼별 자료형, 결측치 갯수, 데이터 총 갯수
aw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Age           180 non-null    int64 
 1   Profit        180 non-null    object
 2   Location      180 non-null    object
 3   Vehicle-Type  180 non-null    object
 4   Previous      180 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 7.2+ KB


In [177]:
# 일련번호 만들기 : numpy arange (시작번호, 종료번호, 간격)
idx = np.arange(1, 180+1)
aw.index = idx
aw.head()

Unnamed: 0,Age,Profit,Location,Vehicle-Type,Previous
1,21,"$1,387",Tionesta,Sedan,0
2,23,1754,Sheffield,SUV,1
3,24,1817,Sheffield,Hybrid,1
4,25,1040,Sheffield,Compact,0
5,26,1273,Kane,Sedan,1


In [179]:
# json 파일 읽고 데이터프레임으로 만들기

seoul_geo = pd.read_json('data/seoul_geo_simple.json')

seoul_geo.info()

seoul_geo.head()   # 컬럼 뒷부분이 보이지 않음

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   type      25 non-null     object
 1   features  25 non-null     object
dtypes: object(2)
memory usage: 528.0+ bytes


Unnamed: 0,type,features
0,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11..."
1,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11..."
2,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11..."
3,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11..."
4,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11..."


In [182]:
# pd.set_option함수를 이용해서 출력양식을 변경
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 100)
pd.set_option('display.max_colwidth', -1)
seoul_geo.head()

  after removing the cwd from sys.path.


Unnamed: 0,type,features
0,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11250', 'name': '강동구', 'name_eng': 'Gangdong-gu', 'base_year': '2013'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[127.11519584981606, 37.557533180704915], [127.11879551821994, 37.557222485451305], [127.12146867175024, 37.55986003393365], [127.12435254630417, 37.56144246249796], [127.13593925898998, 37.56564793048277], [127.14930548011061, 37.56892250303897], [127.15511020940411, 37.57093642128295], [127.16683184366129, 37.57672487388627], [127.17038810813094, 37.576465605301046], [127.17607118428914, 37.57678573961056], [127.17905504160184, 37.57791388161732], [127.17747787800164, 37.57448983055031], [127.1781775408844, 37.571481967974336], [127.17995281860672, 37.569309661290504], [127.18122821955262, 37.56636089217979], [127.18169407550688, 37.56286338914073], [127.18408792330152, 37.55814280369575], [127.18350810324185, 37.550053002101485], [127.1852644795464, 37.54888592026534], [127.18480906237207, 37.545296888806796], [127.18543378919821, 37.54260756512178], [127.18364810569703, 37.54241347907019], [127.18116465939269, 37.54384126582126], [127.17770860504257, 37.542414255164374], [127.1744373170213, 37.54277723796397], [127.16830424484573, 37.54145405702079], [127.16530984307447, 37.54221851258693], [127.15566835118616, 37.53119520531309], [127.15538075046105, 37.52652930087977], [127.15154315998161, 37.522828709496416], [127.14981542759394, 37.51926843453025], [127.14791518058246, 37.51918714979303], [127.14684644251928, 37.51661384818575], [127.14672806823502, 37.51415680680291], [127.14532023498624, 37.51464060108829], [127.12123165719615, 37.52528270089], [127.12251496040881, 37.52751810228347], [127.12532464331997, 37.53572787912298], [127.12061313033807, 37.538129867839416], [127.1116764203608, 37.540669955324965], [127.11418412219375, 37.54474592090681], [127.11600200349189, 37.55053147511706], [127.11600943681239, 37.55580061507081], [127.11519584981606, 37.557533180704915]]]}}"
1,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11240', 'name': '송파구', 'name_eng': 'Songpa-gu', 'base_year': '2013'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[127.0690698130372, 37.522279423505026], [127.07496309841329, 37.52091052765938], [127.07968915919895, 37.52077294752823], [127.08639455667742, 37.52161824624356], [127.0943611414465, 37.523984206117525], [127.10087519791962, 37.524841220167055], [127.10484130265957, 37.53120327509912], [127.1116764203608, 37.540669955324965], [127.12061313033807, 37.538129867839416], [127.12532464331997, 37.53572787912298], [127.12251496040881, 37.52751810228347], [127.12123165719615, 37.52528270089], [127.14532023498624, 37.51464060108829], [127.14672806823502, 37.51415680680291], [127.14670263739373, 37.512786602955565], [127.14462782318448, 37.511529542030715], [127.14323992504048, 37.50951977457089], [127.1420864475393, 37.50578973782813], [127.14324986168657, 37.502649431479774], [127.1473517108062, 37.50069754405746], [127.14980119646964, 37.50046502392898], [127.15223804785649, 37.50170492532197], [127.15401160147654, 37.500347919909956], [127.16086308579277, 37.49886565522751], [127.1634944215765, 37.497445406097484], [127.16199885180917, 37.49402577547199], [127.16216448592424, 37.491639601211624], [127.16040295326431, 37.4877818619403], [127.15892216655034, 37.486126922469445], [127.15393282790794, 37.48483891408459], [127.15147990997852, 37.47745324805034], [127.1515017465549, 37.475633269417585], [127.14857580353349, 37.47381386382568], [127.14415938171436, 37.473692508393505], [127.14112111404233, 37.470600239054825], [127.13631568648837, 37.47214721764681], [127.13281577200672, 37.47257463763244], [127.13307493070646, 37.468907694139894], [127.13750907701846, 37.46647058226059], [127.13478085797742, 37.46509524639883], [127.1308437061496, 37.46509985661207], [127.12728991002369, 37.46673043118672], [127.12729757787379, 37.46421548908766], [127.12440571080893, 37.46240445587048], [127.12441393026374, 37.46442715236855], [127.12265007208167, 37.46756987490939], [127.11380709617507, 37.479633334849325], [127.1143875173445, 37.48073157362458], [127.11117085201238, 37.485708381512445], [127.1077937689776, 37.48860875954992], [127.10433125798602, 37.490728250649646], [127.0988509639092, 37.49302529254068], [127.08050206733888, 37.49783151325589], [127.0764808967127, 37.498612695580306], [127.0719146000724, 37.50224013587669], [127.06926628842805, 37.51717796437217], [127.06860425556381, 37.51812758676938], [127.0690698130372, 37.522279423505026]]]}}"
2,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11230', 'name': '강남구', 'name_eng': 'Gangnam-gu', 'base_year': '2013'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[127.05867359288398, 37.52629974922568], [127.0690698130372, 37.522279423505026], [127.06860425556381, 37.51812758676938], [127.06926628842805, 37.51717796437217], [127.0719146000724, 37.50224013587669], [127.0764808967127, 37.498612695580306], [127.08050206733888, 37.49783151325589], [127.0988509639092, 37.49302529254068], [127.10433125798602, 37.490728250649646], [127.1077937689776, 37.48860875954992], [127.11117085201238, 37.485708381512445], [127.1143875173445, 37.48073157362458], [127.11380709617507, 37.479633334849325], [127.12265007208167, 37.46756987490939], [127.12441393026374, 37.46442715236855], [127.12440571080893, 37.46240445587048], [127.11957248720776, 37.45936217377656], [127.11885903757606, 37.45578434878651], [127.11535741803938, 37.45722556454321], [127.11413179478714, 37.45875072431525], [127.10841788934951, 37.45972888008147], [127.10561257180657, 37.456815702518746], [127.10032466845217, 37.45598440195682], [127.09842759318751, 37.45862253857461], [127.09712653145507, 37.460848194480654], [127.09039613625872, 37.465520545397716], [127.0866005634691, 37.47006403057779], [127.08640440578156, 37.472697935184655], [127.0802737559454, 37.471973057552624], [127.07602132306535, 37.47005021331707], [127.07476117209941, 37.47199174520626], [127.07231320371885, 37.47234914588019], [127.07135137525977, 37.47107802023145], [127.06463901956462, 37.47003474490574], [127.06371868919344, 37.4661503234869], [127.0588551029968, 37.465611780743174], [127.0559170481904, 37.4659228914077], [127.04713549385288, 37.474479419244865], [127.04345123620755, 37.48276415595109], [127.03621915098798, 37.48175802427603], [127.03372275812187, 37.48674434662411], [127.02265609299096, 37.509970106251416], [127.02038705349842, 37.51771683027875], [127.01917707838057, 37.520085205855196], [127.01397119667513, 37.52503988289669], [127.02302831890559, 37.53231899582663], [127.0269608080842, 37.53484752757724], [127.0319617044248, 37.536064291470424], [127.04806779588436, 37.52970198575087], [127.04903802830752, 37.53140496708317], [127.05116490008963, 37.52975116557232], [127.05867359288398, 37.52629974922568]]]}}"
3,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11220', 'name': '서초구', 'name_eng': 'Seocho-gu', 'base_year': '2013'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[127.01397119667513, 37.52503988289669], [127.01917707838057, 37.520085205855196], [127.02038705349842, 37.51771683027875], [127.02265609299096, 37.509970106251416], [127.03372275812187, 37.48674434662411], [127.03621915098798, 37.48175802427603], [127.04345123620755, 37.48276415595109], [127.04713549385288, 37.474479419244865], [127.0559170481904, 37.4659228914077], [127.0588551029968, 37.465611780743174], [127.06371868919344, 37.4661503234869], [127.06463901956462, 37.47003474490574], [127.07135137525977, 37.47107802023145], [127.07231320371885, 37.47234914588019], [127.07476117209941, 37.47199174520626], [127.07602132306535, 37.47005021331707], [127.0802737559454, 37.471973057552624], [127.08640440578156, 37.472697935184655], [127.0866005634691, 37.47006403057779], [127.09039613625872, 37.465520545397716], [127.09712653145507, 37.460848194480654], [127.09842759318751, 37.45862253857461], [127.09673714758375, 37.45597209899094], [127.09722129576434, 37.45374822681991], [127.09575982122928, 37.45332980525459], [127.09472136159357, 37.450897902539175], [127.09293250684935, 37.450020696864506], [127.09047890749349, 37.44637473407341], [127.09046928565951, 37.44296826114185], [127.0862358725955, 37.44118543250345], [127.08441983692467, 37.4383879031398], [127.07686576585408, 37.43960712011444], [127.07375875606847, 37.43898415920535], [127.07407631675713, 37.43719357187124], [127.07666569012467, 37.43600054505559], [127.07603719210388, 37.43429107517633], [127.07361291761038, 37.43318474533595], [127.07271473569163, 37.42939553659177], [127.0733788318578, 37.42814484786288], [127.06885354151605, 37.42731815367302], [127.06778107605433, 37.426197424057314], [127.06317558623768, 37.4272916178182], [127.05998777565219, 37.4273224867045], [127.05424556064274, 37.42574929824175], [127.05197080928994, 37.42749842502397], [127.04960937636815, 37.42801020057224], [127.04849622718511, 37.430672016902065], [127.04191594772718, 37.43568906449929], [127.0379686253535, 37.43634417139204], [127.03751805596916, 37.438362795245276], [127.04031700689708, 37.44191429311459], [127.03959875976469, 37.443582700519194], [127.0398984887873, 37.44656106007936], [127.03825522385397, 37.448766467898395], [127.03916301678915, 37.45180237055558], [127.03881782597922, 37.45382039851715], [127.03695436044305, 37.45537592726508], [127.03573307034355, 37.4586703897792], [127.03683946894893, 37.46103886642786], [127.03337331972266, 37.462966775127626], [127.02820831539744, 37.455700834295826], [127.02263694708293, 37.45335816711404], [127.01827371395349, 37.4525593623189], [127.01316256500736, 37.45257906566242], [127.01110931353561, 37.45456166745922], [127.00836380369604, 37.45936868039916], [127.00738548779366, 37.459815333664274], [127.00552362663117, 37.46445102893571], [127.00008523087483, 37.46455774995882], [126.99837609897334, 37.46390918086617], [126.99932142462428, 37.46113351815481], [126.99893310307874, 37.459376062410314], [126.9953054179472, 37.45860121328987], [126.99072073195462, 37.455326143310025], [126.98956736277059, 37.457600756400446], [126.99026416700147, 37.46271603227842], [126.98896316546526, 37.465041871263544], [126.98662755598336, 37.466937278295305], [126.9846374349825, 37.46996301876212], [126.98367668291802, 37.473856492692086], [126.98500224966135, 37.49356837311327], [126.9871787157338, 37.49719505997539], [126.9832495184969, 37.49948552591205], [126.98241580381733, 37.50120029501884], [126.98223807916081, 37.509314966770326], [126.98458580602838, 37.51070333105394], [126.98948242685965, 37.5108780134613], [126.99148001917875, 37.50990503427709], [127.00011962020382, 37.513901653034374], [127.00583392114271, 37.516905128452926], [127.00818058911564, 37.51877313923874], [127.01022186960886, 37.522020085671926], [127.01397119667513, 37.52503988289669]]]}}"
4,FeatureCollection,"{'type': 'Feature', 'properties': {'code': '11210', 'name': '관악구', 'name_eng': 'Gwanak-gu', 'base_year': '2013'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[126.98367668291802, 37.473856492692086], [126.9846374349825, 37.46996301876212], [126.98662755598336, 37.466937278295305], [126.98896316546526, 37.465041871263544], [126.99026416700147, 37.46271603227842], [126.98956736277059, 37.457600756400446], [126.99072073195462, 37.455326143310025], [126.98484249930785, 37.45391909788938], [126.9829408096241, 37.450206782833206], [126.97835022660695, 37.447659155806164], [126.97608193440507, 37.44478918862847], [126.9731300196836, 37.444722870088114], [126.96650852936277, 37.44276983031553], [126.96618702895445, 37.439376482995094], [126.96520439085143, 37.438249784006246], [126.9614877541633, 37.437956805629675], [126.96054904645496, 37.43673997185797], [126.95527369898224, 37.43673711968809], [126.9473688393239, 37.4347689647565], [126.94440352544498, 37.43476162120059], [126.9415292183489, 37.43315139671158], [126.94037501670272, 37.43462213966344], [126.9405640311191, 37.437501011208845], [126.9376981355065, 37.44041709605302], [126.93312955918624, 37.44290014710262], [126.93309127096236, 37.44533734785938], [126.93084408056525, 37.447382928333994], [126.92527839995981, 37.45161884570837], [126.9245243450059, 37.45392293573877], [126.91887928082078, 37.45495082787016], [126.9167728146601, 37.45490566423789], [126.91641538472182, 37.45870245071989], [126.91495285904284, 37.461166184511065], [126.91584245173756, 37.462474576247985], [126.91374656127704, 37.46375990852858], [126.91032166997253, 37.469818629944285], [126.91280966667205, 37.47083063715413], [126.91405961426707, 37.47416764846582], [126.9115784808617, 37.4753960485947], [126.91181700249076, 37.47814319736339], [126.90276666415615, 37.47652007992712], [126.90156094129895, 37.47753842789901], [126.90531975801812, 37.48218087575429], [126.90805655355825, 37.48218338568103], [126.91533979779165, 37.484392208242134], [126.91916807529428, 37.48660606817164], [126.92639563063156, 37.48715979752876], [126.92869559665061, 37.49132126714011], [126.92981699800066, 37.49218420958284], [126.93346386636452, 37.49043826776755], [126.93669800083833, 37.49026778789087], [126.93844070234584, 37.4893532861132], [126.94373156012337, 37.48938843727846], [126.94922661389508, 37.49125437495649], [126.95396955055433, 37.48955250290043], [126.9559655046206, 37.48820165625994], [126.95881175306481, 37.48874989165474], [126.96329694970828, 37.4905835370787], [126.96291787066104, 37.48803272157808], [126.96443983219191, 37.48442261322104], [126.9634428120456, 37.48067931902171], [126.9725891850662, 37.472561363278125], [126.97901795539295, 37.47376525108475], [126.98367668291802, 37.473856492692086]]]}}"


In [186]:
# 읽어들인 json 데이터가 복잡한 중첩구조로 구성된 경우
# file 객체로 json 파일을 읽은 후 json.load 함수로
# 데이터들을 메모리에 적재한 후 json_nomalize함수로
# 필요한 데이터를 지정해서 데이터프레임으로 만들어야 함


import json
from pandas import json_normalize

with open('data/seoul_geo_simple.json') as f:
    jdata = json.load(f)

jdata


seoul_geo = json_normalize(jdata['features'])
seoul_geo.head()

Unnamed: 0,type,properties.code,properties.name,properties.name_eng,properties.base_year,geometry.type,geometry.coordinates
0,Feature,11250,강동구,Gangdong-gu,2013,Polygon,"[[[127.11519584981606, 37.557533180704915], [127.11879551821994, 37.557222485451305], [127.12146867175024, 37.55986003393365], [127.12435254630417, 37.56144246249796], [127.13593925898998, 37.56564793048277], [127.14930548011061, 37.56892250303897], [127.15511020940411, 37.57093642128295], [127.16683184366129, 37.57672487388627], [127.17038810813094, 37.576465605301046], [127.17607118428914, 37.57678573961056], [127.17905504160184, 37.57791388161732], [127.17747787800164, 37.57448983055031], [127.1781775408844, 37.571481967974336], [127.17995281860672, 37.569309661290504], [127.18122821955262, 37.56636089217979], [127.18169407550688, 37.56286338914073], [127.18408792330152, 37.55814280369575], [127.18350810324185, 37.550053002101485], [127.1852644795464, 37.54888592026534], [127.18480906237207, 37.545296888806796], [127.18543378919821, 37.54260756512178], [127.18364810569703, 37.54241347907019], [127.18116465939269, 37.54384126582126], [127.17770860504257, 37.542414255164374], [127.1744373170213, 37.54277723796397], [127.16830424484573, 37.54145405702079], [127.16530984307447, 37.54221851258693], [127.15566835118616, 37.53119520531309], [127.15538075046105, 37.52652930087977], [127.15154315998161, 37.522828709496416], [127.14981542759394, 37.51926843453025], [127.14791518058246, 37.51918714979303], [127.14684644251928, 37.51661384818575], [127.14672806823502, 37.51415680680291], [127.14532023498624, 37.51464060108829], [127.12123165719615, 37.52528270089], [127.12251496040881, 37.52751810228347], [127.12532464331997, 37.53572787912298], [127.12061313033807, 37.538129867839416], [127.1116764203608, 37.540669955324965], [127.11418412219375, 37.54474592090681], [127.11600200349189, 37.55053147511706], [127.11600943681239, 37.55580061507081], [127.11519584981606, 37.557533180704915]]]"
1,Feature,11240,송파구,Songpa-gu,2013,Polygon,"[[[127.0690698130372, 37.522279423505026], [127.07496309841329, 37.52091052765938], [127.07968915919895, 37.52077294752823], [127.08639455667742, 37.52161824624356], [127.0943611414465, 37.523984206117525], [127.10087519791962, 37.524841220167055], [127.10484130265957, 37.53120327509912], [127.1116764203608, 37.540669955324965], [127.12061313033807, 37.538129867839416], [127.12532464331997, 37.53572787912298], [127.12251496040881, 37.52751810228347], [127.12123165719615, 37.52528270089], [127.14532023498624, 37.51464060108829], [127.14672806823502, 37.51415680680291], [127.14670263739373, 37.512786602955565], [127.14462782318448, 37.511529542030715], [127.14323992504048, 37.50951977457089], [127.1420864475393, 37.50578973782813], [127.14324986168657, 37.502649431479774], [127.1473517108062, 37.50069754405746], [127.14980119646964, 37.50046502392898], [127.15223804785649, 37.50170492532197], [127.15401160147654, 37.500347919909956], [127.16086308579277, 37.49886565522751], [127.1634944215765, 37.497445406097484], [127.16199885180917, 37.49402577547199], [127.16216448592424, 37.491639601211624], [127.16040295326431, 37.4877818619403], [127.15892216655034, 37.486126922469445], [127.15393282790794, 37.48483891408459], [127.15147990997852, 37.47745324805034], [127.1515017465549, 37.475633269417585], [127.14857580353349, 37.47381386382568], [127.14415938171436, 37.473692508393505], [127.14112111404233, 37.470600239054825], [127.13631568648837, 37.47214721764681], [127.13281577200672, 37.47257463763244], [127.13307493070646, 37.468907694139894], [127.13750907701846, 37.46647058226059], [127.13478085797742, 37.46509524639883], [127.1308437061496, 37.46509985661207], [127.12728991002369, 37.46673043118672], [127.12729757787379, 37.46421548908766], [127.12440571080893, 37.46240445587048], [127.12441393026374, 37.46442715236855], [127.12265007208167, 37.46756987490939], [127.11380709617507, 37.479633334849325], [127.1143875173445, 37.48073157362458], [127.11117085201238, 37.485708381512445], [127.1077937689776, 37.48860875954992], [127.10433125798602, 37.490728250649646], [127.0988509639092, 37.49302529254068], [127.08050206733888, 37.49783151325589], [127.0764808967127, 37.498612695580306], [127.0719146000724, 37.50224013587669], [127.06926628842805, 37.51717796437217], [127.06860425556381, 37.51812758676938], [127.0690698130372, 37.522279423505026]]]"
2,Feature,11230,강남구,Gangnam-gu,2013,Polygon,"[[[127.05867359288398, 37.52629974922568], [127.0690698130372, 37.522279423505026], [127.06860425556381, 37.51812758676938], [127.06926628842805, 37.51717796437217], [127.0719146000724, 37.50224013587669], [127.0764808967127, 37.498612695580306], [127.08050206733888, 37.49783151325589], [127.0988509639092, 37.49302529254068], [127.10433125798602, 37.490728250649646], [127.1077937689776, 37.48860875954992], [127.11117085201238, 37.485708381512445], [127.1143875173445, 37.48073157362458], [127.11380709617507, 37.479633334849325], [127.12265007208167, 37.46756987490939], [127.12441393026374, 37.46442715236855], [127.12440571080893, 37.46240445587048], [127.11957248720776, 37.45936217377656], [127.11885903757606, 37.45578434878651], [127.11535741803938, 37.45722556454321], [127.11413179478714, 37.45875072431525], [127.10841788934951, 37.45972888008147], [127.10561257180657, 37.456815702518746], [127.10032466845217, 37.45598440195682], [127.09842759318751, 37.45862253857461], [127.09712653145507, 37.460848194480654], [127.09039613625872, 37.465520545397716], [127.0866005634691, 37.47006403057779], [127.08640440578156, 37.472697935184655], [127.0802737559454, 37.471973057552624], [127.07602132306535, 37.47005021331707], [127.07476117209941, 37.47199174520626], [127.07231320371885, 37.47234914588019], [127.07135137525977, 37.47107802023145], [127.06463901956462, 37.47003474490574], [127.06371868919344, 37.4661503234869], [127.0588551029968, 37.465611780743174], [127.0559170481904, 37.4659228914077], [127.04713549385288, 37.474479419244865], [127.04345123620755, 37.48276415595109], [127.03621915098798, 37.48175802427603], [127.03372275812187, 37.48674434662411], [127.02265609299096, 37.509970106251416], [127.02038705349842, 37.51771683027875], [127.01917707838057, 37.520085205855196], [127.01397119667513, 37.52503988289669], [127.02302831890559, 37.53231899582663], [127.0269608080842, 37.53484752757724], [127.0319617044248, 37.536064291470424], [127.04806779588436, 37.52970198575087], [127.04903802830752, 37.53140496708317], [127.05116490008963, 37.52975116557232], [127.05867359288398, 37.52629974922568]]]"
3,Feature,11220,서초구,Seocho-gu,2013,Polygon,"[[[127.01397119667513, 37.52503988289669], [127.01917707838057, 37.520085205855196], [127.02038705349842, 37.51771683027875], [127.02265609299096, 37.509970106251416], [127.03372275812187, 37.48674434662411], [127.03621915098798, 37.48175802427603], [127.04345123620755, 37.48276415595109], [127.04713549385288, 37.474479419244865], [127.0559170481904, 37.4659228914077], [127.0588551029968, 37.465611780743174], [127.06371868919344, 37.4661503234869], [127.06463901956462, 37.47003474490574], [127.07135137525977, 37.47107802023145], [127.07231320371885, 37.47234914588019], [127.07476117209941, 37.47199174520626], [127.07602132306535, 37.47005021331707], [127.0802737559454, 37.471973057552624], [127.08640440578156, 37.472697935184655], [127.0866005634691, 37.47006403057779], [127.09039613625872, 37.465520545397716], [127.09712653145507, 37.460848194480654], [127.09842759318751, 37.45862253857461], [127.09673714758375, 37.45597209899094], [127.09722129576434, 37.45374822681991], [127.09575982122928, 37.45332980525459], [127.09472136159357, 37.450897902539175], [127.09293250684935, 37.450020696864506], [127.09047890749349, 37.44637473407341], [127.09046928565951, 37.44296826114185], [127.0862358725955, 37.44118543250345], [127.08441983692467, 37.4383879031398], [127.07686576585408, 37.43960712011444], [127.07375875606847, 37.43898415920535], [127.07407631675713, 37.43719357187124], [127.07666569012467, 37.43600054505559], [127.07603719210388, 37.43429107517633], [127.07361291761038, 37.43318474533595], [127.07271473569163, 37.42939553659177], [127.0733788318578, 37.42814484786288], [127.06885354151605, 37.42731815367302], [127.06778107605433, 37.426197424057314], [127.06317558623768, 37.4272916178182], [127.05998777565219, 37.4273224867045], [127.05424556064274, 37.42574929824175], [127.05197080928994, 37.42749842502397], [127.04960937636815, 37.42801020057224], [127.04849622718511, 37.430672016902065], [127.04191594772718, 37.43568906449929], [127.0379686253535, 37.43634417139204], [127.03751805596916, 37.438362795245276], [127.04031700689708, 37.44191429311459], [127.03959875976469, 37.443582700519194], [127.0398984887873, 37.44656106007936], [127.03825522385397, 37.448766467898395], [127.03916301678915, 37.45180237055558], [127.03881782597922, 37.45382039851715], [127.03695436044305, 37.45537592726508], [127.03573307034355, 37.4586703897792], [127.03683946894893, 37.46103886642786], [127.03337331972266, 37.462966775127626], [127.02820831539744, 37.455700834295826], [127.02263694708293, 37.45335816711404], [127.01827371395349, 37.4525593623189], [127.01316256500736, 37.45257906566242], [127.01110931353561, 37.45456166745922], [127.00836380369604, 37.45936868039916], [127.00738548779366, 37.459815333664274], [127.00552362663117, 37.46445102893571], [127.00008523087483, 37.46455774995882], [126.99837609897334, 37.46390918086617], [126.99932142462428, 37.46113351815481], [126.99893310307874, 37.459376062410314], [126.9953054179472, 37.45860121328987], [126.99072073195462, 37.455326143310025], [126.98956736277059, 37.457600756400446], [126.99026416700147, 37.46271603227842], [126.98896316546526, 37.465041871263544], [126.98662755598336, 37.466937278295305], [126.9846374349825, 37.46996301876212], [126.98367668291802, 37.473856492692086], [126.98500224966135, 37.49356837311327], [126.9871787157338, 37.49719505997539], [126.9832495184969, 37.49948552591205], [126.98241580381733, 37.50120029501884], [126.98223807916081, 37.509314966770326], [126.98458580602838, 37.51070333105394], [126.98948242685965, 37.5108780134613], [126.99148001917875, 37.50990503427709], [127.00011962020382, 37.513901653034374], [127.00583392114271, 37.516905128452926], [127.00818058911564, 37.51877313923874], [127.01022186960886, 37.522020085671926], [127.01397119667513, 37.52503988289669]]]"
4,Feature,11210,관악구,Gwanak-gu,2013,Polygon,"[[[126.98367668291802, 37.473856492692086], [126.9846374349825, 37.46996301876212], [126.98662755598336, 37.466937278295305], [126.98896316546526, 37.465041871263544], [126.99026416700147, 37.46271603227842], [126.98956736277059, 37.457600756400446], [126.99072073195462, 37.455326143310025], [126.98484249930785, 37.45391909788938], [126.9829408096241, 37.450206782833206], [126.97835022660695, 37.447659155806164], [126.97608193440507, 37.44478918862847], [126.9731300196836, 37.444722870088114], [126.96650852936277, 37.44276983031553], [126.96618702895445, 37.439376482995094], [126.96520439085143, 37.438249784006246], [126.9614877541633, 37.437956805629675], [126.96054904645496, 37.43673997185797], [126.95527369898224, 37.43673711968809], [126.9473688393239, 37.4347689647565], [126.94440352544498, 37.43476162120059], [126.9415292183489, 37.43315139671158], [126.94037501670272, 37.43462213966344], [126.9405640311191, 37.437501011208845], [126.9376981355065, 37.44041709605302], [126.93312955918624, 37.44290014710262], [126.93309127096236, 37.44533734785938], [126.93084408056525, 37.447382928333994], [126.92527839995981, 37.45161884570837], [126.9245243450059, 37.45392293573877], [126.91887928082078, 37.45495082787016], [126.9167728146601, 37.45490566423789], [126.91641538472182, 37.45870245071989], [126.91495285904284, 37.461166184511065], [126.91584245173756, 37.462474576247985], [126.91374656127704, 37.46375990852858], [126.91032166997253, 37.469818629944285], [126.91280966667205, 37.47083063715413], [126.91405961426707, 37.47416764846582], [126.9115784808617, 37.4753960485947], [126.91181700249076, 37.47814319736339], [126.90276666415615, 37.47652007992712], [126.90156094129895, 37.47753842789901], [126.90531975801812, 37.48218087575429], [126.90805655355825, 37.48218338568103], [126.91533979779165, 37.484392208242134], [126.91916807529428, 37.48660606817164], [126.92639563063156, 37.48715979752876], [126.92869559665061, 37.49132126714011], [126.92981699800066, 37.49218420958284], [126.93346386636452, 37.49043826776755], [126.93669800083833, 37.49026778789087], [126.93844070234584, 37.4893532861132], [126.94373156012337, 37.48938843727846], [126.94922661389508, 37.49125437495649], [126.95396955055433, 37.48955250290043], [126.9559655046206, 37.48820165625994], [126.95881175306481, 37.48874989165474], [126.96329694970828, 37.4905835370787], [126.96291787066104, 37.48803272157808], [126.96443983219191, 37.48442261322104], [126.9634428120456, 37.48067931902171], [126.9725891850662, 37.472561363278125], [126.97901795539295, 37.47376525108475], [126.98367668291802, 37.473856492692086]]]"


## 웹 페이지에서 table 태그를 데이터프레임으로 만들기
* read_html

In [230]:
# hanbit.co.kr의 store 메뉴의 전체도서목록을 데이터프레임으로 만들기

import requests
from bs4 import BeautifulSoup

In [231]:
url = 'https://www.hanbit.co.kr/store/books/full_book_list.html'
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36' }

In [259]:
res = requests.get(url, headers=headers)

tables = pd.read_html(res.text)
tables[1]

Unnamed: 0,브랜드,도서명,저자,발행일,정가
0,한빛비즈,비겁한 돈,황현희 외 1명,2021-11-08,"16,000원"
1,한빛미디어,구글 BERT의 정석,수다르산 라비찬디란,2021-11-03,"34,000원"
2,한빛미디어,머신러닝 디자인 패턴,발리아파 락슈마난 외 2명,2021-11-01,"38,000원"
3,한빛미디어,소프트웨어 아키텍처 101,마크 리처즈 외 1명,2021-11-01,"32,000원"
4,한빛미디어,혼자 공부하는 SQL,우재남,2021-11-01,"24,000원"
5,한빛에듀,똑똑한 두뇌 연습 : 놀이공원 미로찾기,권나영,2021-10-28,"7,500원"
6,한빛미디어,한 권으로 다지는 머신러닝&딥러닝 with 파이썬,알베르토 아르타산체스 외 1명,2021-10-21,"40,000원"
7,한빛아카데미,"IT CookBook, 시스템 프로그래밍: 리눅스&유닉스",이종원,2021-10-15,"28,000원"
8,한빛미디어,살아 움직이는 머신러닝 파이프라인 설계,하네스 하프케 외 1명,2021-10-11,"32,000원"
9,한빛미디어,시험장에 몰래 가져갈 이경오의 SQL+SQLD 비밀노트,이경오,2021-10-10,"32,000원"


In [191]:
tables[1].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   브랜드     50 non-null     object
 1   도서명     50 non-null     object
 2   저자      50 non-null     object
 3   발행일     50 non-null     object
 4   정가      50 non-null     object
dtypes: object(5)
memory usage: 2.1+ KB


In [217]:
# yes24.co.kr의 베스트셀러 페이지의 도서목록을 read_html 함수로 추출해보세요

url = 'http://www.yes24.com/24/Category/BestSeller.html'

res = requests.get(url, headers=headers)

In [219]:
# pd.read_html(res.text)

# html 소스내에 tabler 태그가 불완전하게 작성되었기 때문에 read_html함수로 데이터를 가져올 수 없음

In [221]:
# 데이터프레임을 파일로 저장하기
# to_xxx 함수 이용

In [260]:
# 앞서 생성한 managership 데이터프레임을 csv파일로 저장함

managership.to_csv('managers.csv', index=False)

In [251]:
# 앞서 만든 전체도서목록에 대한 데이터프레임을 csv파일로 저장해보세요 (hanbboks.csv)

tables[1].to_csv('hanbboks.csv', index=False)