## 6.5. Index

In [1]:
import pandas as pd
import numpy  as np

idx1 = pd.Index(np.arange(10))
idx1

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [2]:
# 인덱싱 접근
idx1[2]

2

In [3]:
# 슬라이싱
idx1[::3]

Int64Index([0, 3, 6, 9], dtype='int64')

In [4]:
# 마스킹
idx1[ idx1 < 5]

Int64Index([0, 1, 2, 3, 4], dtype='int64')

In [5]:
# 모양새
idx1.shape

(10,)

In [6]:
# 인덱스 이름 변경
idx1.name = 'idx1'

In [7]:
idx1[0] = 4

TypeError: Index does not support mutable operations

## 6.5.1. 멀티(계층적) 인덱스

In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.notebook_repr_html', False)

activities_val = {'20230102': [80, 70, 75], '20230103': [90,75,82], '20230104':[95, 72, 89]}
df = pd.DataFrame(data=activities_val, index=['Adams', 'Moses', 'Miriam'])
df

        20230102  20230103  20230104
Adams         80        90        95
Moses         70        75        72
Miriam        75        82        89

In [2]:
multi_idx_df = pd.DataFrame(np.random.randint(50, 100, 18).reshape(9,2),
                     index=[['Adams','Adams','Adams', 'Moses', 'Moses',
                             'Moses','Miriam','Miriam','Miriam'],
              ['20230102','20230103','20230104','20230102','20230103',
              '20230104','20230102','20230103','20230104']],
              columns=['Act', 'Breath'])
multi_idx_df

                 Act  Breath
Adams  20230102   63      56
       20230103   50      74
       20230104   74      82
Moses  20230102   80      66
       20230103   93      56
       20230104   65      75
Miriam 20230102   99      87
       20230103   66      63
       20230104   93      87

In [3]:
multi_idx_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('Adams', '20230102') to ('Miriam', '20230104')
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Act     9 non-null      int32
 1   Breath  9 non-null      int32
dtypes: int32(2)
memory usage: 386.0+ bytes


In [4]:
idx = pd.MultiIndex.from_product([['Adams','Moses', 'Miriam'],
                                         ['20230102','20230103','20230104']])
multi_idx_product_df = pd.DataFrame(np.random.randint(50, 100, 18).reshape(9,2),
                 index=idx, columns=['Act', 'Breath'])
multi_idx_product_df


                 Act  Breath
Adams  20230102   52      86
       20230103   82      88
       20230104   66      58
Moses  20230102   53      76
       20230103   65      63
       20230104   70      69
Miriam 20230102   77      82
       20230103   88      70
       20230104   56      96

In [5]:
#병동 호실
#환자명
#일자
#활동지수
tuple_index = \
   [('Adams', '20230102'), ('Adams', '20230103'), ('Adams', '20230104'),
   ('Moses', '20230102'),('Moses', '20230103'),('Moses', '20230104'),
   ('Miriam', '20230102'),('Miriam', '20230103'),('Miriam', '2023010')
   ]

multi_index = pd.MultiIndex.from_tuples(tuple_index)
multi_index.names = ['FirstName','Date'] # 인덱스 이름 변경
multi_idx_product_df = pd.DataFrame(data=np.random.randint(50, 100, 18).reshape(9,2),
                index=multi_index, columns=['Act', 'Breath'])
multi_idx_product_df

                    Act  Breath
FirstName Date                 
Adams     20230102   63      67
          20230103   96      80
          20230104   80      78
Moses     20230102   74      93
          20230103   79      74
          20230104   75      88
Miriam    20230102   80      77
          20230103   96      69
          2023010    59      74

In [6]:
multi_idx_product_df.sort_index(inplace=True)
multi_idx_product_df

                    Act  Breath
FirstName Date                 
Adams     20230102   63      67
          20230103   96      80
          20230104   80      78
Miriam    2023010    59      74
          20230102   80      77
          20230103   96      69
Moses     20230102   74      93
          20230103   79      74
          20230104   75      88

### 6.5.2. 인덱스로 데이터 접근하기

In [7]:
# 암묵적 인덱스로 접근(3행까지, 1열까지)
multi_idx_product_df.iloc[:3, :1]

                    Act
FirstName Date         
Adams     20230102   63
          20230103   96
          20230104   80

In [8]:
# 명시적 인덱스로 접근
multi_idx_product_df.loc['Adams',:]

          Act  Breath
Date                 
20230102   63      67
20230103   96      80
20230104   80      78

In [9]:
multi_idx_product_df.loc['Adams','20230103']

Act       96
Breath    80
Name: (Adams, 20230103), dtype: int32

In [10]:
# 팬시인덱싱도 가능함
multi_idx_product_df.loc[['Adams','Miriam']]

                    Act  Breath
FirstName Date                 
Adams     20230102   63      67
          20230103   96      80
          20230104   80      78
Miriam    2023010    59      74
          20230102   80      77
          20230103   96      69

In [11]:
multi_idx_df.loc[:, '20230103']

KeyError: '20230103'

In [12]:
multi_idx_df.loc[(slice(None), '20230102'), :]

                 Act  Breath
Adams  20230102   63      56
Moses  20230102   80      66
Miriam 20230102   99      87

In [13]:
#pd.IndexSlice으로 레벨1조회
idx = pd.IndexSlice
multi_idx_product_df.loc[ idx[:, '20230102'], : ]


                    Act  Breath
FirstName Date                 
Adams     20230102   63      67
Miriam    20230102   80      77
Moses     20230102   74      93

In [14]:
# query메서드로 레벨1 조회
multi_idx_product_df.query("Date == '20230102'")

                    Act  Breath
FirstName Date                 
Adams     20230102   63      67
Miriam    20230102   80      77
Moses     20230102   74      93

### xs() 메서드

In [15]:
# 20230103 일자 모든 환자의 '활동지수' 및 '호흡지수' 조회
multi_idx_product_df.xs('20230103', level='Date')

           Act  Breath
FirstName             
Adams       96      80
Miriam      96      69
Moses       79      74

In [16]:
multi_idx_product_df.xs('20230103', level=1, drop_level=False)

                    Act  Breath
FirstName Date                 
Adams     20230103   96      80
Miriam    20230103   96      69
Moses     20230103   79      74

In [17]:
multi_idx_product_df.xs('Adams', level=0, drop_level=False)

                    Act  Breath
FirstName Date                 
Adams     20230102   63      67
          20230103   96      80
          20230104   80      78

In [18]:
multi_idx_product_df.xs(('Moses','20230102'), drop_level=False)

Act       74
Breath    93
Name: (Moses, 20230102), dtype: int32

### 6.5.3 인덱스 설정 및 해제

In [19]:
multi_idx_product_df.unstack(level=1)

              Act                             Breath                    \
Date      2023010 20230102 20230103 20230104 2023010 20230102 20230103   
FirstName                                                                
Adams         NaN     63.0     96.0     80.0     NaN     67.0     80.0   
Miriam       59.0     80.0     96.0      NaN    74.0     77.0     69.0   
Moses         NaN     74.0     79.0     75.0     NaN     93.0     74.0   

                    
Date      20230104  
FirstName           
Adams         78.0  
Miriam         NaN  
Moses         88.0  

In [20]:
multi_idx_product_df.unstack(level=0)

            Act              Breath             
FirstName Adams Miriam Moses  Adams Miriam Moses
Date                                            
2023010     NaN   59.0   NaN    NaN   74.0   NaN
20230102   63.0   80.0  74.0   67.0   77.0  93.0
20230103   96.0   96.0  79.0   80.0   69.0  74.0
20230104   80.0    NaN  75.0   78.0    NaN  88.0

In [21]:
df2 = multi_idx_product_df.unstack(level=0)
df2


            Act              Breath             
FirstName Adams Miriam Moses  Adams Miriam Moses
Date                                            
2023010     NaN   59.0   NaN    NaN   74.0   NaN
20230102   63.0   80.0  74.0   67.0   77.0  93.0
20230103   96.0   96.0  79.0   80.0   69.0  74.0
20230104   80.0    NaN  75.0   78.0    NaN  88.0

In [22]:
df2.index

Index(['2023010', '20230102', '20230103', '20230104'], dtype='object', name='Date')

In [23]:
df2.loc['20230102']

        FirstName
Act     Adams        63.0
        Miriam       80.0
        Moses        74.0
Breath  Adams        67.0
        Miriam       77.0
        Moses        93.0
Name: 20230102, dtype: float64

In [24]:
no_index_df = multi_idx_product_df.reset_index()
no_index_df


  FirstName      Date  Act  Breath
0     Adams  20230102   63      67
1     Adams  20230103   96      80
2     Adams  20230104   80      78
3    Miriam   2023010   59      74
4    Miriam  20230102   80      77
5    Miriam  20230103   96      69
6     Moses  20230102   74      93
7     Moses  20230103   79      74
8     Moses  20230104   75      88

In [25]:
no_index_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   FirstName  9 non-null      object
 1   Date       9 non-null      object
 2   Act        9 non-null      int32 
 3   Breath     9 non-null      int32 
dtypes: int32(2), object(2)
memory usage: 344.0+ bytes


In [26]:
set_index_df = no_index_df.set_index(['FirstName', 'Date'])
set_index_df.info()


<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('Adams', '20230102') to ('Moses', '20230104')
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Act     9 non-null      int32
 1   Breath  9 non-null      int32
dtypes: int32(2)
memory usage: 537.0+ bytes


In [27]:
idx = ['Seoul','Pusan', 'Jeju']
df = pd.DataFrame(np.random.randint(50, 100, 3).reshape(3,1),
              index=idx)
df

        0
Seoul  66
Pusan  92
Jeju   93

In [28]:
df.reindex(['Daejon', 'Seoul'], method='ffill')

         0
Daejon  93
Seoul   66

In [29]:
changed_level = set_index_df.swaplevel('FirstName', 'Date')
changed_level


                    Act  Breath
Date     FirstName             
20230102 Adams       63      67
20230103 Adams       96      80
20230104 Adams       80      78
2023010  Miriam      59      74
20230102 Miriam      80      77
20230103 Miriam      96      69
20230102 Moses       74      93
20230103 Moses       79      74
20230104 Moses       75      88

In [30]:
changed_level.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('20230102', 'Adams') to ('20230104', 'Moses')
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Act     9 non-null      int32
 1   Breath  9 non-null      int32
dtypes: int32(2)
memory usage: 537.0+ bytes


### 6.5.4 간단한 집계 및 통계

In [39]:
multi_idx_product_df.mean(level='FirstName')

  multi_idx_product_df.mean(level='FirstName')


                 Act     Breath
FirstName                      
Adams      79.666667  75.000000
Miriam     78.333333  73.333333
Moses      76.000000  85.000000

In [40]:
multi_idx_product_df.sum(level='FirstName')

  multi_idx_product_df.sum(level='FirstName')


           Act  Breath
FirstName             
Adams      239     225
Miriam     235     220
Moses      228     255

In [41]:
multi_idx_product_df.min(level='Date')

  multi_idx_product_df.min(level='Date')


          Act  Breath
Date                 
20230102   63      67
20230103   79      69
20230104   75      78
2023010    59      74

In [42]:
multi_idx_product_df.groupby(level='FirstName').sum()

           Act  Breath
FirstName             
Adams      239     225
Miriam     235     220
Moses      228     255

In [43]:
import pandas as pd
import numpy as np
activities_val = {'20230102': [80, 70, 75], '20230103': [90,75,82], '20230104':[95, 72, 89]}
df = pd.DataFrame(data=activities_val, index=['Adams', 'Moses', 'Miriam'])
df.index.names = ['FirstName']
df

           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72
Miriam           75        82        89

In [44]:
# MultiIndex가 아니기에 level 옵션이 적용되지 않음
df.min(level="FirstName")


  df.min(level="FirstName")


           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72
Miriam           75        82        89

In [45]:
df.mean(axis=1)

FirstName
Adams     88.333333
Moses     72.333333
Miriam    82.000000
dtype: float64

In [46]:
df.mean(axis=0)

20230102    75.000000
20230103    82.333333
20230104    85.333333
dtype: float64