## DataFrame 사용해 보기

In [1]:
import pandas as pd

In [2]:
two_dimensional_list = [['dongwook', 50, 86], ['sineui', 89, 31], ['ikjoong', 68, 91], ['yoonsoo', 88, 75]]

In [6]:
my_df = pd.DataFrame(two_dimensional_list, columns=['name', 'english_score', 'math_score'], index=['a', 'b', 'c', 'd'])
my_df

Unnamed: 0,name,english_score,math_score
a,dongwook,50,86
b,sineui,89,31
c,ikjoong,68,91
d,yoonsoo,88,75


In [5]:
type(my_df)

pandas.core.frame.DataFrame

In [7]:
my_df.dtypes

name             object
english_score     int64
math_score        int64
dtype: object

---
## DataFrame을 만드는 다양한 방법

### From list of lists, array of arrays, list of series

In [8]:
import numpy as np
import pandas as pd

two_dimensional_list = [['dongwook', 50, 86], ['sineui', 89, 31], ['ikjoong', 68, 91], ['yoonsoo', 88, 75]]
two_dimensional_array = np.array(two_dimensional_list)
list_of_series = [
    pd.Series(['dongwook', 50, 86]), 
    pd.Series(['sineui', 89, 31]), 
    pd.Series(['ikjoong', 68, 91]), 
    pd.Series(['yoonsoo', 88, 75])
]

# 아래 셋은 모두 동일합니다
df1 = pd.DataFrame(two_dimensional_list)
df2 = pd.DataFrame(two_dimensional_array)
df3 = pd.DataFrame(list_of_series)

print(df1)

          0   1   2
0  dongwook  50  86
1    sineui  89  31
2   ikjoong  68  91
3   yoonsoo  88  75


### From dict of lists, dict of arrays, dict of series

In [9]:
import numpy as np
import pandas as pd

names = ['dongwook', 'sineui', 'ikjoong', 'yoonsoo']
english_scores = [50, 89, 68, 88]
math_scores = [86, 31, 91, 75]

dict1 = {
    'name': names, 
    'english_score': english_scores, 
    'math_score': math_scores
}

dict2 = {
    'name': np.array(names), 
    'english_score': np.array(english_scores), 
    'math_score': np.array(math_scores)
}

dict3 = {
    'name': pd.Series(names), 
    'english_score': pd.Series(english_scores), 
    'math_score': pd.Series(math_scores)
}


# 아래 셋은 모두 동일합니다
df1 = pd.DataFrame(dict1)
df2 = pd.DataFrame(dict2)
df3 = pd.DataFrame(dict3)

print(df1)

       name  english_score  math_score
0  dongwook             50          86
1    sineui             89          31
2   ikjoong             68          91
3   yoonsoo             88          75


### From list of dicts

In [10]:
import numpy as np
import pandas as pd

my_list = [
    {'name': 'dongwook', 'english_score': 50, 'math_score': 86},
    {'name': 'sineui', 'english_score': 89, 'math_score': 31},
    {'name': 'ikjoong', 'english_score': 68, 'math_score': 91},
    {'name': 'yoonsoo', 'english_score': 88, 'math_score': 75}
]

df = pd.DataFrame(my_list)
print(df)

       name  english_score  math_score
0  dongwook             50          86
1    sineui             89          31
2   ikjoong             68          91
3   yoonsoo             88          75


---
## 스타들의 생일은 언제?

In [12]:
import pandas as pd

# 코드를 작성하세요.
names = ['Taylor Swift', 'Aaron Sorkin', 'Harry Potter', 'Ji-Sung Park']
birthdays = ['December 13, 1989', 'June 9, 1961', 'July 31, 1980', 'February 25, 1981']
occupations = ['Singer-songwriter', 'Screenwriter', 'Wizard', 'Footballer']

star_dict = {
    'name': names,
    'birthday': birthdays,
    'occupation': occupations
}

df = pd.DataFrame(star_dict)
# 정답 출력
df

Unnamed: 0,name,birthday,occupation
0,Taylor Swift,"December 13, 1989",Singer-songwriter
1,Aaron Sorkin,"June 9, 1961",Screenwriter
2,Harry Potter,"July 31, 1980",Wizard
3,Ji-Sung Park,"February 25, 1981",Footballer


---
## pandas로 데이터 읽어들이기

In [16]:
iphone_df = pd.read_csv('data/iphone.csv', index_col=0)
# 헤더가 없는 경우에는 iphone_df = pd.read_csv('data/iphone.csv', header=None)
# index_col=0 : 0번째 column이 각 row의 이름으로 지정됨

In [17]:
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [15]:
type(iphone_df)

pandas.core.frame.DataFrame

---
## 가장 인기 있는 아기 이름은?

In [18]:
import pandas as pd

# 코드를 작성하세요.
df = pd.read_csv('data/popular_baby_names.csv')
# 정답 출력
df

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2016,FEMALE,ASIAN AND PACIFIC ISLANDER,Olivia,172,1
1,2016,FEMALE,ASIAN AND PACIFIC ISLANDER,Chloe,112,2
2,2016,FEMALE,ASIAN AND PACIFIC ISLANDER,Sophia,104,3
3,2016,FEMALE,ASIAN AND PACIFIC ISLANDER,Emily,99,4
4,2016,FEMALE,ASIAN AND PACIFIC ISLANDER,Emma,99,4
...,...,...,...,...,...,...
11340,2011,MALE,WHITE NON HISPANIC,LEV,10,97
11341,2011,MALE,WHITE NON HISPANIC,AUGUSTUS,10,97
11342,2011,MALE,WHITE NON HISPANIC,SHAUL,10,97
11343,2011,MALE,WHITE NON HISPANIC,WESLEY,10,97


---
## 메가밀리언 로또 당첨 번호

In [19]:
import pandas as pd

# 코드를 작성하세요.
df = pd.read_csv('data/mega_millions.csv', index_col=0)
# 정답 출력
df

Unnamed: 0_level_0,Winning Numbers,Mega Ball,Multiplier
Draw Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
02/15/2019,10 38 40 43 65,12,2.0
02/12/2019,15 32 39 50 65,7,3.0
02/08/2019,14 24 31 42 48,13,3.0
02/05/2019,03 34 36 59 66,7,2.0
02/01/2019,02 37 48 66 68,11,5.0
...,...,...,...
05/31/2002,12 28 45 46 52,47,
05/28/2002,06 21 22 29 32,24,
05/24/2002,02 04 32 44 52,36,
05/21/2002,04 28 39 41 44,9,


---
## DataFrame 인덱싱 1

In [1]:
import pandas as pd

In [2]:
iphone_df = pd.read_csv('data/iphone.csv', index_col=0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [4]:
iphone_df.loc['iPhone 8', '메모리']

'2GB'

In [5]:
iphone_df.loc['iPhone X', :]

출시일        2017-11-03
디스플레이             5.8
메모리               3GB
출시 버전        iOS 11.1
Face ID           Yes
Name: iPhone X, dtype: object

In [6]:
iphone_df.loc['iPhone X']

출시일        2017-11-03
디스플레이             5.8
메모리               3GB
출시 버전        iOS 11.1
Face ID           Yes
Name: iPhone X, dtype: object

In [7]:
type(iphone_df.loc['iPhone X'])

pandas.core.series.Series

In [8]:
iphone_df.loc[:, '출시일']

iPhone 7         2016-09-16
iPhone 7 Plus    2016-09-16
iPhone 8         2017-09-22
iPhone 8 Plus    2017-09-22
iPhone X         2017-11-03
iPhone XS        2018-09-21
iPhone XS Max    2018-09-21
Name: 출시일, dtype: object

In [9]:
iphone_df['출시일']

iPhone 7         2016-09-16
iPhone 7 Plus    2016-09-16
iPhone 8         2017-09-22
iPhone 8 Plus    2017-09-22
iPhone X         2017-11-03
iPhone XS        2018-09-21
iPhone XS Max    2018-09-21
Name: 출시일, dtype: object

In [10]:
type(iphone_df['출시일'])

pandas.core.series.Series

---
## 방송사 시청률 받아오기 1

In [15]:
import pandas as pd

df = pd.read_csv('data/broadcast.csv', index_col=0)

df.loc[2016, 'KBS'] # KBS의 2016년 시청률

27.583000000000002

---
## 방송사 시청률 받아오기 2

In [16]:
import pandas as pd

df = pd.read_csv('data/broadcast.csv', index_col=0)

df['JTBC'] # JTBC의 시청률

2011    7.380
2012    7.878
2013    7.810
2014    7.490
2015    7.267
2016    7.727
2017    9.453
Name: JTBC, dtype: float64

---
## 방송사 시청률 받아오기 3

In [21]:
import pandas as pd

df = pd.read_csv('data/broadcast.csv', index_col=0)

df.loc[:, ['SBS','JTBC']] # df[['SBS', 'JTBC']] 도 같음

Unnamed: 0,SBS,JTBC
2011,11.173,7.38
2012,11.408,7.878
2013,9.673,7.81
2014,9.108,7.49
2015,9.099,7.267
2016,8.669,7.727
2017,8.661,9.453


---
## 카드사 고객 분석

In [22]:
import pandas as pd

samsong_df = pd.read_csv('data/samsong.csv')
hyundee_df = pd.read_csv('data/hyundee.csv')

In [23]:
samsong_df

Unnamed: 0,요일,식비,교통비,문화생활비,기타
0,MON,19420,2560,4308,3541
1,TUE,16970,2499,7644,2903
2,WED,15091,2511,5674,2015
3,THU,17880,2545,8621,3012
4,FRI,27104,2993,23052,2508
5,SAT,29055,2803,15330,4901
6,SUN,23509,1760,19030,4230


In [24]:
hyundee_df

Unnamed: 0,요일,식비,교통비,문화생활비,기타
0,MON,22420,2574,5339,5546
1,TUE,19940,2689,3524,2501
2,WED,18086,2281,5364,2234
3,THU,18863,2155,9942,3252
4,FRI,35144,2463,33511,2342
5,SAT,34952,2812,19397,4324
6,SUN,28513,2680,19925,4577


In [25]:
days = samsong_df['요일']
samsongs = samsong_df['문화생활비']
hyundees = hyundee_df['문화생활비']

In [26]:
person_dict = {
    'day': days,
    'samsong': samsongs,
    'hyundee': hyundees
}

df = pd.DataFrame(person_dict)

In [27]:
df

Unnamed: 0,day,samsong,hyundee
0,MON,4308,5339
1,TUE,7644,3524
2,WED,5674,5364
3,THU,8621,9942
4,FRI,23052,33511
5,SAT,15330,19397
6,SUN,19030,19925


---
## DataFrame 인덱싱 2

In [28]:
import pandas as pd

In [29]:
iphone_df = pd.read_csv('data/iphone.csv', index_col=0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [30]:
iphone_df.loc['iPhone X']

출시일        2017-11-03
디스플레이             5.8
메모리               3GB
출시 버전        iOS 11.1
Face ID           Yes
Name: iPhone X, dtype: object

In [32]:
iphone_df.loc[['iPhone X', 'iPhone 8']]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No


In [33]:
type(iphone_df.loc[['iPhone X', 'iPhone 8']])

pandas.core.frame.DataFrame

In [34]:
iphone_df['Face ID']

iPhone 7          No
iPhone 7 Plus     No
iPhone 8          No
iPhone 8 Plus     No
iPhone X         Yes
iPhone XS        Yes
iPhone XS Max    Yes
Name: Face ID, dtype: object

In [35]:
iphone_df[['Face ID', '출시일', '메모리']]

Unnamed: 0,Face ID,출시일,메모리
iPhone 7,No,2016-09-16,2GB
iPhone 7 Plus,No,2016-09-16,3GB
iPhone 8,No,2017-09-22,2GB
iPhone 8 Plus,No,2017-09-22,3GB
iPhone X,Yes,2017-11-03,3GB
iPhone XS,Yes,2018-09-21,4GB
iPhone XS Max,Yes,2018-09-21,4GB


In [36]:
iphone_df.loc['iPhone 8':'iPhone XS']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes


In [37]:
iphone_df.loc[:'iPhone XS']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes


In [38]:
iphone_df['메모리':'Face ID']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID


In [39]:
iphone_df.loc[:, '메모리':'Face ID']

Unnamed: 0,메모리,출시 버전,Face ID
iPhone 7,2GB,iOS 10.0,No
iPhone 7 Plus,3GB,iOS 10.0,No
iPhone 8,2GB,iOS 11.0,No
iPhone 8 Plus,3GB,iOS 11.0,No
iPhone X,3GB,iOS 11.1,Yes
iPhone XS,4GB,iOS 12.0,Yes
iPhone XS Max,4GB,iOS 12.0,Yes


In [40]:
iphone_df.loc['iPhone 7':'iPhone X', '메모리':'Face ID']

Unnamed: 0,메모리,출시 버전,Face ID
iPhone 7,2GB,iOS 10.0,No
iPhone 7 Plus,3GB,iOS 10.0,No
iPhone 8,2GB,iOS 11.0,No
iPhone 8 Plus,3GB,iOS 11.0,No
iPhone X,3GB,iOS 11.1,Yes


---
## 방송사 시청률 받아오기 4

In [41]:
import pandas as pd

df = pd.read_csv('data/broadcast.csv', index_col=0)

df

Unnamed: 0,KBS,MBC,SBS,TV CHOSUN,JTBC,Channel A,MBN
2011,35.951,18.374,11.173,9.102,7.38,3.771,2.809
2012,36.163,16.022,11.408,8.785,7.878,5.874,3.31
2013,31.989,16.778,9.673,9.026,7.81,5.35,3.825
2014,31.21,15.663,9.108,9.44,7.49,5.776,4.572
2015,27.777,16.573,9.099,9.94,7.267,6.678,5.52
2016,27.583,14.982,8.669,9.829,7.727,6.624,5.477
2017,26.89,12.465,8.661,8.886,9.453,6.056,5.215


In [43]:
df.loc[2012:2017, 'KBS':'SBS']

Unnamed: 0,KBS,MBC,SBS
2012,36.163,16.022,11.408
2013,31.989,16.778,9.673
2014,31.21,15.663,9.108
2015,27.777,16.573,9.099
2016,27.583,14.982,8.669
2017,26.89,12.465,8.661


---
## DataFrame 조건으로 인덱싱

In [44]:
import pandas as pd

In [45]:
iphone_df = pd.read_csv('data/iphone.csv', index_col=0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [46]:
iphone_df.loc[[True, False, True, True, False, True, False]]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes


In [48]:
iphone_df.loc[[True, False, False, True, False, False, False]]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No


In [49]:
iphone_df.loc[:, [True, False, False, True, False]]

Unnamed: 0,출시일,출시 버전
iPhone 7,2016-09-16,iOS 10.0
iPhone 7 Plus,2016-09-16,iOS 10.0
iPhone 8,2017-09-22,iOS 11.0
iPhone 8 Plus,2017-09-22,iOS 11.0
iPhone X,2017-11-03,iOS 11.1
iPhone XS,2018-09-21,iOS 12.0
iPhone XS Max,2018-09-21,iOS 12.0


In [50]:
iphone_df['디스플레이'] > 5

iPhone 7         False
iPhone 7 Plus     True
iPhone 8         False
iPhone 8 Plus     True
iPhone X          True
iPhone XS         True
iPhone XS Max     True
Name: 디스플레이, dtype: bool

In [51]:
iphone_df.loc[iphone_df['디스플레이'] > 5]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [52]:
iphone_df['Face ID'] == 'Yes'

iPhone 7         False
iPhone 7 Plus    False
iPhone 8         False
iPhone 8 Plus    False
iPhone X          True
iPhone XS         True
iPhone XS Max     True
Name: Face ID, dtype: bool

In [53]:
iphone_df.loc[iphone_df['Face ID'] == 'Yes']

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [54]:
(iphone_df['디스플레이'] > 5) & (iphone_df['Face ID'] == 'Yes')  # AND

iPhone 7         False
iPhone 7 Plus    False
iPhone 8         False
iPhone 8 Plus    False
iPhone X          True
iPhone XS         True
iPhone XS Max     True
dtype: bool

In [55]:
condition = (iphone_df['디스플레이'] > 5) & (iphone_df['Face ID'] == 'Yes')

In [56]:
iphone_df.loc[condition]

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [57]:
 (iphone_df['디스플레이'] > 5) | (iphone_df['Face ID'] == 'Yes')  # OR

iPhone 7         False
iPhone 7 Plus     True
iPhone 8         False
iPhone 8 Plus     True
iPhone X          True
iPhone XS         True
iPhone XS Max     True
dtype: bool

---
## 방송사 시청률 받아오기 5

In [58]:
import pandas as pd

df = pd.read_csv('data/broadcast.csv', index_col=0)

df

Unnamed: 0,KBS,MBC,SBS,TV CHOSUN,JTBC,Channel A,MBN
2011,35.951,18.374,11.173,9.102,7.38,3.771,2.809
2012,36.163,16.022,11.408,8.785,7.878,5.874,3.31
2013,31.989,16.778,9.673,9.026,7.81,5.35,3.825
2014,31.21,15.663,9.108,9.44,7.49,5.776,4.572
2015,27.777,16.573,9.099,9.94,7.267,6.678,5.52
2016,27.583,14.982,8.669,9.829,7.727,6.624,5.477
2017,26.89,12.465,8.661,8.886,9.453,6.056,5.215


In [59]:
df['KBS'] > 30

2011     True
2012     True
2013     True
2014     True
2015    False
2016    False
2017    False
Name: KBS, dtype: bool

In [61]:
df.loc[df['KBS'] > 30, 'KBS']

2011    35.951
2012    36.163
2013    31.989
2014    31.210
Name: KBS, dtype: float64

---
## 방송사 시청률 받아오기 6

In [62]:
import pandas as pd

df = pd.read_csv('data/broadcast.csv', index_col=0)

df

Unnamed: 0,KBS,MBC,SBS,TV CHOSUN,JTBC,Channel A,MBN
2011,35.951,18.374,11.173,9.102,7.38,3.771,2.809
2012,36.163,16.022,11.408,8.785,7.878,5.874,3.31
2013,31.989,16.778,9.673,9.026,7.81,5.35,3.825
2014,31.21,15.663,9.108,9.44,7.49,5.776,4.572
2015,27.777,16.573,9.099,9.94,7.267,6.678,5.52
2016,27.583,14.982,8.669,9.829,7.727,6.624,5.477
2017,26.89,12.465,8.661,8.886,9.453,6.056,5.215


In [63]:
df['SBS'] < df['TV CHOSUN']

2011    False
2012    False
2013    False
2014     True
2015     True
2016     True
2017     True
dtype: bool

In [64]:
df.loc[df['SBS'] < df['TV CHOSUN'], ['SBS', 'TV CHOSUN']]

Unnamed: 0,SBS,TV CHOSUN
2014,9.108,9.44
2015,9.099,9.94
2016,8.669,9.829
2017,8.661,8.886


---
## DataFrame 위치로 인덱싱하기

In [65]:
import pandas as pd

In [66]:
iphone_df = pd.read_csv('data/iphone.csv', index_col=0)
iphone_df

Unnamed: 0,출시일,디스플레이,메모리,출시 버전,Face ID
iPhone 7,2016-09-16,4.7,2GB,iOS 10.0,No
iPhone 7 Plus,2016-09-16,5.5,3GB,iOS 10.0,No
iPhone 8,2017-09-22,4.7,2GB,iOS 11.0,No
iPhone 8 Plus,2017-09-22,5.5,3GB,iOS 11.0,No
iPhone X,2017-11-03,5.8,3GB,iOS 11.1,Yes
iPhone XS,2018-09-21,5.8,4GB,iOS 12.0,Yes
iPhone XS Max,2018-09-21,6.5,4GB,iOS 12.0,Yes


In [67]:
iphone_df.iloc[2, 4]

'No'

In [68]:
iphone_df.iloc[[1, 3], [1, 4]]

Unnamed: 0,디스플레이,Face ID
iPhone 7 Plus,5.5,No
iPhone 8 Plus,5.5,No


In [69]:
iphone_df.iloc[3:, 1:4]

Unnamed: 0,디스플레이,메모리,출시 버전
iPhone 8 Plus,5.5,3GB,iOS 11.0
iPhone X,5.8,3GB,iOS 11.1
iPhone XS,5.8,4GB,iOS 12.0
iPhone XS Max,6.5,4GB,iOS 12.0


---
## DataFrame 인덱싱 문법 정리
<br>

| 이름으로 인덱싱하기           | 기본 형태                             | 단축 형태                      |
| ----------------------------- | ------------------------------------- | ------------------------------ |
| 하나의 row 이름               | `df.loc["row4"]`                      |                                |
| row 이름의 리스트             | `df.loc[["row4", "row5", "row3"]]`    |                                |
| row 이름의 리스트 슬라이싱    | `df.loc["row2":"row5"]`               | `df["row2":"row5"]`            |
| 하나의 column 이름            | `df.loc[:, "col1"]`                   | `df["col1"]`                   |
| column 이름의 리스트          | `df.loc[:, ["col4", "col6", "col3"]]` | `df[["col4", "col6", "col3"]]` |
| column 이름의 리스트 슬라이싱 | `df.loc[:, "col2":"col5"]`            |                                |



| 위치로 인덱싱하기             | 기본 형태               | 단축 형태 |
| ----------------------------- | ----------------------- | --------- |
| 하나의 row 위치               | `df.iloc[8]`            |           |
| row 위치의 리스트             | `df.iloc[[4, 5, 3]]`    |           |
| row 위치의 리스트 슬라이싱    | `df.iloc[2:5]`          | `df[2:5]` |
| 하나의 column 위치            | `df.iloc[:, 3]`         |           |
| column 위치의 리스트          | `df.iloc[:, [3, 5, 6]]` |           |
| column 위치의 리스트 슬라이싱 | `df.iloc[:, 3:7]`       |           |