# 4장 판다스 Index 클래스 이해하기

- 이 장에서는 인덱스를 검색하기 위한 다양한 방법을 알아봅니다.  
- 시리즈나 데이터프레임 안의 행과 열의 정보를 관리하는 
  - 레이블을 만드는 방법  
  - 숫자,날짜,문자열,범주 등으로 구성된 단일 레이블 체계와 복수 레이블 체계가 
  - 어떻게 만들어지고 작동하는 지에 대해 알아봅니다.  

## 예제  4-1  숫자와 문자 인덱스 생성

In [2]:
import pandas as pd

In [3]:
pd.Index

pandas.core.indexes.base.Index

In [5]:
idx1 = pd.Index([1, 2, 3, 4])
idx1

Int64Index([1, 2, 3, 4], dtype='int64')

In [6]:
type(idx1)

pandas.core.indexes.numeric.Int64Index

In [7]:
idx2 = pd.Index(range(1,4))

In [8]:
type(idx2)

pandas.core.indexes.range.RangeIndex

In [9]:
idx1.dtype, idx2.dtype

(dtype('int64'), dtype('int64'))

In [10]:
idx_s = pd.Index(['a', 'b','c'])

In [11]:
idx_s

Index(['a', 'b', 'c'], dtype='object')

In [12]:
idx_s.values

array(['a', 'b', 'c'], dtype=object)

In [13]:
idx_s.shape

(3,)

In [14]:
idx_f = pd.Index([1, 2, 3, 4],dtype='float')

In [15]:
idx_f

Float64Index([1.0, 2.0, 3.0, 4.0], dtype='float64')

## 예제  4-2  숫자와 문자  인덱스 특징 

In [16]:
idx1[0]

1

In [17]:
idx_s[:]

Index(['a', 'b', 'c'], dtype='object')

In [18]:
idx1[[0]]

Int64Index([1], dtype='int64')

In [19]:
idx1[idx1 < 3 ]

Int64Index([1, 2], dtype='int64')

In [20]:
try : 
    idx1.loc[0]
except Exception as e :
    print(e)

'Int64Index' object has no attribute 'loc'


In [21]:
idx4 = pd.Index([4,5,6,7])

In [22]:
try : 
    idx4[0] = 100
except Exception as e :
    print(e)

Index does not support mutable operations


In [23]:
idx4 = pd.Index([1,2,3,4])

In [24]:
idx4

Int64Index([1, 2, 3, 4], dtype='int64')

## 예제  4-3  숫자와 문자  인덱스 메소드 처리

In [33]:
idx1

Int64Index([1, 2, 3, 4], dtype='int64')

In [34]:
idx2

RangeIndex(start=1, stop=4, step=1)

In [35]:
idx1.difference(idx2)

Int64Index([4], dtype='int64')

In [36]:
idx1 + 3 

Int64Index([4, 5, 6, 7], dtype='int64')

In [38]:
try :
    idx1.add(3)
    
except Exception as e :
    print(e)

'Int64Index' object has no attribute 'add'


## 예제  4-4  암묵적 인덱스 변경

In [39]:
import numpy as np

In [40]:
s1 = pd.Series(index=list('aaab'), data=np.arange(4))

In [41]:
s1

a    0
a    1
a    2
b    3
dtype: int32

In [42]:
s2 = pd.Series(index=list('baaa'), data=np.arange(4)) 
s2

b    0
a    1
a    2
a    3
dtype: int32

In [43]:
s1 + s2

a    1
a    2
a    3
a    2
a    3
a    4
a    3
a    4
a    5
b    3
dtype: int32

In [44]:
s1.sort_index().add(s2.sort_index())

a    1
a    3
a    5
b    3
dtype: int32

In [45]:
s3 = pd.Series(index=list('aaab'), data=np.arange(4))

In [46]:
s1 + s3

a    0
a    2
a    4
b    6
dtype: int32

In [47]:
s4 = pd.Series(index=list('aaabc'), data=np.arange(5))

In [48]:
s1 + s4

a    0.0
a    1.0
a    2.0
a    1.0
a    2.0
a    3.0
a    2.0
a    3.0
a    4.0
b    6.0
c    NaN
dtype: float64

##  예제 4-5 날짜 인덱스 생성 및 속성보기

In [49]:
idx_d = pd.Index(pd.date_range('20130101',periods=3)) 

In [50]:
idx_d

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03'], dtype='datetime64[ns]', freq='D')

In [51]:
idx_d.values

array(['2013-01-01T00:00:00.000000000', '2013-01-02T00:00:00.000000000',
       '2013-01-03T00:00:00.000000000'], dtype='datetime64[ns]')

In [52]:
idx_d.freq

<Day>

In [53]:
index = pd.DatetimeIndex(['2014-07-04', '2014-08-04',
                          '2015-07-04', '2015-08-04'])

In [54]:
data = pd.Series([0, 1, 2, 3], index=index)

In [55]:
data

2014-07-04    0
2014-08-04    1
2015-07-04    2
2015-08-04    3
dtype: int64

In [56]:
index

DatetimeIndex(['2014-07-04', '2014-08-04', '2015-07-04', '2015-08-04'], dtype='datetime64[ns]', freq=None)

In [57]:
index.shape

(4,)

In [58]:
index.dtype

dtype('<M8[ns]')

## 예제 4-6 타임에 대한 빈도 이해하기

#### 시간에 대한 코드 

        Y : Year, M: Month, W : Week, D: Day, h:Hour, m: Minute, s : Second
        ms :Millisecond, us : Microsecond, ns : Nanosecond, ps : Picosecond, fs : Femtosecond

In [60]:
dr1 = pd.date_range('2018-07-03', '2018-07-10')
dr1

DatetimeIndex(['2018-07-03', '2018-07-04', '2018-07-05', '2018-07-06',
               '2018-07-07', '2018-07-08', '2018-07-09', '2018-07-10'],
              dtype='datetime64[ns]', freq='D')

In [61]:
dr2 = pd.date_range('2018-07-03', periods=8)
dr2

DatetimeIndex(['2018-07-03', '2018-07-04', '2018-07-05', '2018-07-06',
               '2018-07-07', '2018-07-08', '2018-07-09', '2018-07-10'],
              dtype='datetime64[ns]', freq='D')

In [62]:
dr3 = pd.date_range('2018-08-03', periods=24, freq='H') 
dr3

DatetimeIndex(['2018-08-03 00:00:00', '2018-08-03 01:00:00',
               '2018-08-03 02:00:00', '2018-08-03 03:00:00',
               '2018-08-03 04:00:00', '2018-08-03 05:00:00',
               '2018-08-03 06:00:00', '2018-08-03 07:00:00',
               '2018-08-03 08:00:00', '2018-08-03 09:00:00',
               '2018-08-03 10:00:00', '2018-08-03 11:00:00',
               '2018-08-03 12:00:00', '2018-08-03 13:00:00',
               '2018-08-03 14:00:00', '2018-08-03 15:00:00',
               '2018-08-03 16:00:00', '2018-08-03 17:00:00',
               '2018-08-03 18:00:00', '2018-08-03 19:00:00',
               '2018-08-03 20:00:00', '2018-08-03 21:00:00',
               '2018-08-03 22:00:00', '2018-08-03 23:00:00'],
              dtype='datetime64[ns]', freq='H')

In [63]:
dr3.to_period('D')

PeriodIndex(['2018-08-03', '2018-08-03', '2018-08-03', '2018-08-03',
             '2018-08-03', '2018-08-03', '2018-08-03', '2018-08-03',
             '2018-08-03', '2018-08-03', '2018-08-03', '2018-08-03',
             '2018-08-03', '2018-08-03', '2018-08-03', '2018-08-03',
             '2018-08-03', '2018-08-03', '2018-08-03', '2018-08-03',
             '2018-08-03', '2018-08-03', '2018-08-03', '2018-08-03'],
            dtype='period[D]', freq='D')

In [64]:
dr_m = pd.period_range('2018-01', periods=13, freq='M')

In [65]:
dr_m

PeriodIndex(['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
             '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12',
             '2019-01'],
            dtype='period[M]', freq='M')

In [67]:
tm_1 = pd.timedelta_range(0, periods=10, freq='H')
tm_1

TimedeltaIndex(['00:00:00', '01:00:00', '02:00:00', '03:00:00', '04:00:00',
                '05:00:00', '06:00:00', '07:00:00', '08:00:00', '09:00:00'],
               dtype='timedelta64[ns]', freq='H')

In [68]:
tm_1 - tm_1[1]

TimedeltaIndex(['-1 days +23:00:00',          '00:00:00',          '01:00:00',
                         '02:00:00',          '03:00:00',          '04:00:00',
                         '05:00:00',          '06:00:00',          '07:00:00',
                         '08:00:00'],
               dtype='timedelta64[ns]', freq='H')

In [69]:
dr2

DatetimeIndex(['2018-07-03', '2018-07-04', '2018-07-05', '2018-07-06',
               '2018-07-07', '2018-07-08', '2018-07-09', '2018-07-10'],
              dtype='datetime64[ns]', freq='D')

In [70]:
dr2 - dr2[0]

TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days', '5 days',
                '6 days', '7 days'],
               dtype='timedelta64[ns]', freq=None)

#### 분단위로 인덱스 만들기

In [71]:
index = pd.date_range('1/1/2018', periods=9, freq='T')

In [72]:
series = pd.Series(range(9), index=index)

In [73]:
series

2018-01-01 00:00:00    0
2018-01-01 00:01:00    1
2018-01-01 00:02:00    2
2018-01-01 00:03:00    3
2018-01-01 00:04:00    4
2018-01-01 00:05:00    5
2018-01-01 00:06:00    6
2018-01-01 00:07:00    7
2018-01-01 00:08:00    8
Freq: T, dtype: int64

In [74]:
s_3T = series.resample('3T')

In [75]:
s_3T

<pandas.core.resample.DatetimeIndexResampler object at 0x000001E3731BE550>

In [76]:
s_3T.sum()

2018-01-01 00:00:00     3
2018-01-01 00:03:00    12
2018-01-01 00:06:00    21
Freq: 3T, dtype: int64

## 예제 4-7 날짜 인덱스 활용하기

In [78]:
data = pd.read_csv('../data/hanriver_bridge.csv', index_col='Date', parse_dates=True,encoding='cp949')
data.head()

Unnamed: 0_level_0,한강 좌측 인도,한강 우측 인도
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-10-03 00:00:00,4.0,9.0
2012-10-03 01:00:00,4.0,6.0
2012-10-03 02:00:00,1.0,1.0
2012-10-03 03:00:00,2.0,3.0
2012-10-03 04:00:00,6.0,1.0


In [79]:
data.columns

Index(['한강 좌측 인도', '한강 우측 인도'], dtype='object')

In [80]:
data.index

DatetimeIndex(['2012-10-03 00:00:00', '2012-10-03 01:00:00',
               '2012-10-03 02:00:00', '2012-10-03 03:00:00',
               '2012-10-03 04:00:00', '2012-10-03 05:00:00',
               '2012-10-03 06:00:00', '2012-10-03 07:00:00',
               '2012-10-03 08:00:00', '2012-10-03 09:00:00',
               ...
               '2018-05-31 14:00:00', '2018-05-31 15:00:00',
               '2018-05-31 16:00:00', '2018-05-31 17:00:00',
               '2018-05-31 18:00:00', '2018-05-31 19:00:00',
               '2018-05-31 20:00:00', '2018-05-31 21:00:00',
               '2018-05-31 22:00:00', '2018-05-31 23:00:00'],
              dtype='datetime64[ns]', name='Date', length=49608, freq=None)

In [81]:
data.columns=['좌측','우측']

In [82]:
data['합산'] = data.eval('좌측 + 우측')

In [83]:
data.head()

Unnamed: 0_level_0,좌측,우측,합산
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-10-03 00:00:00,4.0,9.0,13.0
2012-10-03 01:00:00,4.0,6.0,10.0
2012-10-03 02:00:00,1.0,1.0,2.0
2012-10-03 03:00:00,2.0,3.0,5.0
2012-10-03 04:00:00,6.0,1.0,7.0


In [84]:
data.isnull().sum()

좌측    8
우측    8
합산    8
dtype: int64

In [85]:
data.shape

(49608, 3)

In [86]:
data_dp = data.dropna()

In [87]:
data_dp.shape

(49600, 3)

In [88]:
daily = data.resample('D').sum()

In [89]:
daily.head()

Unnamed: 0_level_0,좌측,우측,합산
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-10-03,1760.0,1761.0,3521.0
2012-10-04,1708.0,1767.0,3475.0
2012-10-05,1558.0,1590.0,3148.0
2012-10-06,1080.0,926.0,2006.0
2012-10-07,1191.0,951.0,2142.0


In [90]:
weekly_r = data.resample('W').mean()

In [91]:
weekly_r.head()

Unnamed: 0_level_0,좌측,우측,합산
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-10-07,60.808333,58.291667,119.1
2012-10-14,51.660714,48.309524,99.970238
2012-10-21,47.297619,45.017857,92.315476
2012-10-28,41.077381,38.904762,79.982143
2012-11-04,38.142857,34.440476,72.583333


In [92]:
weekly_f = data.asfreq('W')

In [93]:
weekly_f.head()

Unnamed: 0_level_0,좌측,우측,합산
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-10-07,6.0,5.0,11.0
2012-10-14,3.0,3.0,6.0
2012-10-21,5.0,12.0,17.0
2012-10-28,5.0,5.0,10.0
2012-11-04,7.0,11.0,18.0


In [94]:
data.loc['2012-10-07'].head()

Unnamed: 0_level_0,좌측,우측,합산
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-10-07 00:00:00,6.0,5.0,11.0
2012-10-07 01:00:00,12.0,5.0,17.0
2012-10-07 02:00:00,2.0,1.0,3.0
2012-10-07 03:00:00,4.0,2.0,6.0
2012-10-07 04:00:00,1.0,2.0,3.0


## 예제 4-8 범주형 인덱스 활용하기

In [95]:
inx_i = pd.CategoricalIndex([1,2,3,4])

In [96]:
s = pd.Series([1,2,3,4],index=inx_i)  
s

1    1
2    2
3    3
4    4
dtype: int64

In [97]:
s.index

CategoricalIndex([1, 2, 3, 4], categories=[1, 2, 3, 4], ordered=False, dtype='category')

In [98]:
s[3] = 100

In [99]:
s

1      1
2      2
3      3
4    100
dtype: int64

In [100]:
try :
    s[5] = 100
except Exception as e :
    print(e)

index 5 is out of bounds for axis 0 with size 4


In [101]:
s.index = s.index.add_categories(5)

In [102]:
s.index

CategoricalIndex([1, 2, 3, 4], categories=[1, 2, 3, 4, 5], ordered=False, dtype='category')

In [103]:
try :
    s.index = s.index.insert(4,5)
except Exception as e :
    print(e)

Length mismatch: Expected axis has 4 elements, new values have 5 elements


In [104]:
stl = s.index.tolist()

In [105]:
stl

[1, 2, 3, 4]

In [106]:
s.index = stl

In [107]:
s.index

Int64Index([1, 2, 3, 4], dtype='int64')

In [108]:
s[5] = 100

In [109]:
s

1      1
2      2
3      3
4    100
5    100
dtype: int64

In [110]:
s.index = s.index.astype('category')  
s

1      1
2      2
3      3
4    100
5    100
dtype: int64

In [111]:
s.index

CategoricalIndex([1, 2, 3, 4, 5], categories=[1, 2, 3, 4, 5], ordered=False, dtype='category')

## 예제 4-9 멀티인덱스 생성 

In [112]:
import numpy as np

In [113]:
ind = [("서울", 2017), ("경기도",2017)]

In [114]:
si = pd.Series(np.random.randint(1,10,2),index=ind)

In [115]:
si

(서울, 2017)     6
(경기도, 2017)    1
dtype: int32

In [116]:
si.index

Index([('서울', 2017), ('경기도', 2017)], dtype='object')

In [117]:
try :
    si['서울']
except Exception as e :
    print(e)

'서울'


In [118]:
 si['서울',2017]

6

In [119]:
si[('서울',2017)]

6

In [123]:
index = pd.MultiIndex.from_tuples(ind)
index

MultiIndex([( '서울', 2017),
            ('경기도', 2017)],
           )

In [122]:
s = pd.Series(np.random.randint(1,10,2),index=index) 
s

서울   2017    9
경기도  2017    2
dtype: int32

In [124]:
s['서울']

2017    9
dtype: int32

In [125]:
s['서울',2017]

9

In [126]:
s[('서울',2017)]

9

In [127]:
s.loc['서울']

2017    9
dtype: int32

In [128]:
s[:,2017]

서울     9
경기도    2
dtype: int32

In [129]:
s.loc[:,2017]

서울     9
경기도    2
dtype: int32

In [130]:
import numpy as np

In [131]:
arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
          np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])]

In [132]:
s = pd.Series(np.random.randn(8), index=arrays)  
s

bar  one    0.410914
     two    2.668727
baz  one   -0.850950
     two    3.798383
foo  one    0.468191
     two   -0.623192
qux  one   -0.050007
     two   -1.310242
dtype: float64

In [133]:
s.index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [134]:
iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]

In [135]:
arrays2 = pd.MultiIndex.from_product(iterables, names=['first', 'second'])  
arrays2

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [136]:
arrays2.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [137]:
arrays2.labels

  """Entry point for launching an IPython kernel.


FrozenList([[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]])

In [138]:
arrays2.values

array([('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'),
       ('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')],
      dtype=object)

In [139]:
arrays2.names

FrozenList(['first', 'second'])

In [140]:
s2 = pd.Series(np.random.randn(8), index=arrays2)  
s2

first  second
bar    one       1.325394
       two      -1.798063
baz    one       0.512946
       two       0.659957
foo    one       0.584670
       two       1.713146
qux    one      -1.747784
       two      -0.680334
dtype: float64

In [141]:
index = [('서울',2008),('서울',2010),('부산',2008),('부산',2010), ('인천',2008),('인천',2010)]

In [143]:
mul_index = pd.MultiIndex.from_tuples(index)
mul_index

MultiIndex([('서울', 2008),
            ('서울', 2010),
            ('부산', 2008),
            ('부산', 2010),
            ('인천', 2008),
            ('인천', 2010)],
           )

In [144]:
mul_index.values

array([('서울', 2008), ('서울', 2010), ('부산', 2008), ('부산', 2010),
       ('인천', 2008), ('인천', 2010)], dtype=object)

In [145]:
mul_index.value_counts()

(서울, 2010)    1
(인천, 2010)    1
(서울, 2008)    1
(인천, 2008)    1
(부산, 2010)    1
(부산, 2008)    1
dtype: int64

In [146]:
mul_index.levels

FrozenList([['부산', '서울', '인천'], [2008, 2010]])

In [147]:
mul_index.labels

  """Entry point for launching an IPython kernel.


FrozenList([[1, 1, 0, 0, 2, 2], [0, 1, 0, 1, 0, 1]])

## 예제 4-10 멀티인덱스를 이용한 시리즈 활용 

In [149]:
populations = [ 30000,37000, 18970, 19370, 20850, 25140]
populations

[30000, 37000, 18970, 19370, 20850, 25140]

In [150]:
pop = pd.Series(populations, index=mul_index)  
pop

서울  2008    30000
    2010    37000
부산  2008    18970
    2010    19370
인천  2008    20850
    2010    25140
dtype: int64

In [151]:
pop['서울']

2008    30000
2010    37000
dtype: int64

In [152]:
pop['서울',2008]

30000

In [153]:
pop[:, 2010]

서울    37000
부산    19370
인천    25140
dtype: int64

In [154]:
try :
    pop["서울" : "인천"]
except Exception as e :
    print(e)

'Key length (1) was greater than MultiIndex lexsort depth (0)'


In [155]:
pop = pop.sort_index()

In [156]:
pop["서울" : "인천"]

서울  2008    30000
    2010    37000
인천  2008    20850
    2010    25140
dtype: int64

In [159]:
pop.index.names = ['시','년도']
pop

시   년도  
부산  2008    18970
    2010    19370
서울  2008    30000
    2010    37000
인천  2008    20850
    2010    25140
dtype: int64

## 예제 4-11 멀티인덱스를 이용한 데이터프레임  활용 

In [160]:
r_inx = pd.MultiIndex.from_product([[2017,2018],[1,2]], names=['년도','과제점수'])

In [161]:
r_inx

MultiIndex([(2017, 1),
            (2017, 2),
            (2018, 1),
            (2018, 2)],
           names=['년도', '과제점수'])

In [162]:
c_inx = pd.MultiIndex.from_product([['철수','영희','지원'],['컴공','경제']], names=['학생','학과']) 
c_inx

MultiIndex([('철수', '컴공'),
            ('철수', '경제'),
            ('영희', '컴공'),
            ('영희', '경제'),
            ('지원', '컴공'),
            ('지원', '경제')],
           names=['학생', '학과'])

In [163]:
import numpy as np

In [164]:
data = np.round(np.abs(np.random.randn(4,6)),1)
data

array([[0.9, 0.8, 0.7, 0.9, 0.3, 0.1],
       [0.5, 0.9, 0.7, 0.6, 1. , 0. ],
       [0.4, 2.1, 0.8, 0.1, 0.7, 0.7],
       [1.3, 0.1, 0. , 0.4, 0.9, 1.1]])

In [165]:
study_data = pd.DataFrame(data, index=r_inx, columns=c_inx) 
study_data

Unnamed: 0_level_0,학생,철수,철수,영희,영희,지원,지원
Unnamed: 0_level_1,학과,컴공,경제,컴공,경제,컴공,경제
년도,과제점수,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2017,1,0.9,0.8,0.7,0.9,0.3,0.1
2017,2,0.5,0.9,0.7,0.6,1.0,0.0
2018,1,0.4,2.1,0.8,0.1,0.7,0.7
2018,2,1.3,0.1,0.0,0.4,0.9,1.1


In [166]:
study_data.index

MultiIndex([(2017, 1),
            (2017, 2),
            (2018, 1),
            (2018, 2)],
           names=['년도', '과제점수'])

In [167]:
study_data.columns

MultiIndex([('철수', '컴공'),
            ('철수', '경제'),
            ('영희', '컴공'),
            ('영희', '경제'),
            ('지원', '컴공'),
            ('지원', '경제')],
           names=['학생', '학과'])

In [168]:
study_data.index[0]

(2017, 1)

In [169]:
study_data.index[1]

(2017, 2)

In [170]:
study_data.index.names[0]

'년도'

In [171]:
study_data.index.names[1]

'과제점수'

In [172]:
study_data.values

array([[0.9, 0.8, 0.7, 0.9, 0.3, 0.1],
       [0.5, 0.9, 0.7, 0.6, 1. , 0. ],
       [0.4, 2.1, 0.8, 0.1, 0.7, 0.7],
       [1.3, 0.1, 0. , 0.4, 0.9, 1.1]])

In [173]:
study_data['지원']

Unnamed: 0_level_0,학과,컴공,경제
년도,과제점수,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,1,0.3,0.1
2017,2,1.0,0.0
2018,1,0.7,0.7
2018,2,0.9,1.1


In [174]:
study_data['지원','컴공']

년도    과제점수
2017  1       0.3
      2       1.0
2018  1       0.7
      2       0.9
Name: (지원, 컴공), dtype: float64

In [175]:
study_data.loc[:, ('지원','컴공')]

년도    과제점수
2017  1       0.3
      2       1.0
2018  1       0.7
      2       0.9
Name: (지원, 컴공), dtype: float64

### 멀티 인덱스를 사용할 경우 pd.IndexSlice를 사용해서 슬라이스를 만들어서 처리

       . loc[ 행, 열] 로 표시하므로 멀티 인덱스 자체가 행과 열을 구성
       . 멀티 인덱스 내의 특정 정보를 가져오려고 해도 슬라이스로 처리가 필요

In [178]:
try :
    study_data.loc[:2018, '철수':'영희']
except Exception as e :
    print(e)

'Key length (1) was greater than MultiIndex lexsort depth (0)'


In [180]:
study_data = study_data.sort_index(axis=1)  
study_data

Unnamed: 0_level_0,학생,영희,영희,지원,지원,철수,철수
Unnamed: 0_level_1,학과,경제,컴공,경제,컴공,경제,컴공
년도,과제점수,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2017,1,0.9,0.7,0.1,0.3,0.8,0.9
2017,2,0.6,0.7,0.0,1.0,0.9,0.5
2018,1,0.1,0.8,0.7,0.7,2.1,0.4
2018,2,0.4,0.0,1.1,0.9,0.1,1.3


In [181]:
study_data.sort_index(axis=1).loc[:2018, '영희':'지원']

Unnamed: 0_level_0,학생,영희,영희,지원,지원
Unnamed: 0_level_1,학과,경제,컴공,경제,컴공
년도,과제점수,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2017,1,0.9,0.7,0.1,0.3
2017,2,0.6,0.7,0.0,1.0
2018,1,0.1,0.8,0.7,0.7
2018,2,0.4,0.0,1.1,0.9


In [182]:
study_data.loc[pd.IndexSlice[:,1], pd.IndexSlice[:, '컴공']]

Unnamed: 0_level_0,학생,영희,지원,철수
Unnamed: 0_level_1,학과,컴공,컴공,컴공
년도,과제점수,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2017,1,0.7,0.3,0.9
2018,1,0.8,0.7,0.4


In [183]:
study_data.index

MultiIndex([(2017, 1),
            (2017, 2),
            (2018, 1),
            (2018, 2)],
           names=['년도', '과제점수'])

In [184]:
study_data.index.levels[0].dtype

dtype('int64')

In [185]:
study_data.index.levels[1].dtype

dtype('int64')

In [186]:
study_data.xs((2017,1))

학생  학과
영희  경제    0.9
    컴공    0.7
지원  경제    0.1
    컴공    0.3
철수  경제    0.8
    컴공    0.9
Name: (2017, 1), dtype: float64

In [187]:
study_data.xs(2017)

학생,영희,영희,지원,지원,철수,철수
학과,경제,컴공,경제,컴공,경제,컴공
과제점수,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,0.9,0.7,0.1,0.3,0.8,0.9
2,0.6,0.7,0.0,1.0,0.9,0.5


In [188]:
study_data.columns.levels[0].dtype

dtype('O')

In [189]:
study_data.columns.levels[1].dtype

dtype('O')

In [190]:
study_data.xs(('지원','컴공'), axis=1)

년도    과제점수
2017  1       0.3
      2       1.0
2018  1       0.7
      2       0.9
Name: (지원, 컴공), dtype: float64