# 기본 RangeIndex로 데이터프레임만들어보기

In [82]:
import pandas as pd
import numpy as np

pd.set_option('display.notebook_repr_html', False)

activities_val = {'20230102': [80, 70, 75], '20230103': [90,75,82], '20230104':[95, 72, 89]}
df = pd.DataFrame(data=activities_val, index=['Adams', 'Moses', 'Miriam'])
df.index.names = ['FirstName']
df

           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72
Miriam           75        82        89

In [83]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, Adams to Miriam
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   20230102  3 non-null      int64
 1   20230103  3 non-null      int64
 2   20230104  3 non-null      int64
dtypes: int64(3)
memory usage: 96.0+ bytes


In [84]:
df.index

Index(['Adams', 'Moses', 'Miriam'], dtype='object', name='FirstName')

In [85]:
df.index

Index(['Adams', 'Moses', 'Miriam'], dtype='object', name='FirstName')

In [86]:
df.mean(axis=1)

FirstName
Adams     88.333333
Moses     72.333333
Miriam    82.000000
dtype: float64

In [87]:
# MultiIndex가 아니기에 level 옵션이 적용되지 않음
df.min(level="FirstName")

  df.min(level="FirstName")


           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72
Miriam           75        82        89

In [88]:
# 환자 Adams 모든 데이터
df.loc['Adams']

20230102    80
20230103    90
20230104    95
Name: Adams, dtype: int64

In [89]:
# 환자 Adams의 20230103일자 데이터만
df.loc['Adams']['20230103']

90

In [90]:
# 모든 환자의 20230103일자 데이터만
df.loc[:, '20230103']

FirstName
Adams     90
Moses     75
Miriam    82
Name: 20230103, dtype: int64

In [91]:
df.loc['Adams':"Moses"]

           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72

## 싱글 인덱스 만들기

In [92]:

import pandas as pd


# single_index = [('Japan', 'Tokyo'), ('Japan', 'Osaka'), ('Japan', 'Yokohama'),
#                 ('Korea', 'Seoul '), ('Korea', 'Busan'), ('Korea', 'Incheon ')]

# people_cnt = [14_000_000, 19_000_000, 3_700_000, 
#               9_700_000, 3_400_000, 3_000_000]



#병동 호실
#환자명
#일자
#활동지수

tuple_index = \
  [('Adams', '20230102'), ('Adams', '20230103'), ('Adams', '20230104'),
  ('Moses', '20230102'),('Moses', '20230103'),('Moses', '20230104'),
  ('Miriam', '20230102'),('Miriam', '20230103'),('Miriam', '2023010')
  ]

activities = [80, 90, 95,
  70,75, 72,
  65, 82, 89 ]

act_score = pd.Series(activities, index=tuple_index)
act_score

(Adams, 20230102)     80
(Adams, 20230103)     90
(Adams, 20230104)     95
(Moses, 20230102)     70
(Moses, 20230103)     75
(Moses, 20230104)     72
(Miriam, 20230102)    65
(Miriam, 20230103)    82
(Miriam, 2023010)     89
dtype: int64

In [93]:
act_score[('Adams','20230102')]

80

In [94]:
##### act_score.loc['Adams']

In [95]:
act_score[('Adams','20230102'):('Adams','20230104')]

(Adams, 20230102)    80
(Adams, 20230103)    90
(Adams, 20230104)    95
dtype: int64

In [96]:
# 슬라이싱 조회 실패
#### act_score[('Adams',':')]

In [97]:
act_score.index

Index([ ('Adams', '20230102'),  ('Adams', '20230103'),  ('Adams', '20230104'),
        ('Moses', '20230102'),  ('Moses', '20230103'),  ('Moses', '20230104'),
       ('Miriam', '20230102'), ('Miriam', '20230103'),  ('Miriam', '2023010')],
      dtype='object')

## MultiIndex 만들기

In [98]:
multi_index = pd.MultiIndex.from_tuples(tuple_index)
multi_index

MultiIndex([( 'Adams', '20230102'),
            ( 'Adams', '20230103'),
            ( 'Adams', '20230104'),
            ( 'Moses', '20230102'),
            ( 'Moses', '20230103'),
            ( 'Moses', '20230104'),
            ('Miriam', '20230102'),
            ('Miriam', '20230103'),
            ('Miriam',  '2023010')],
           )

In [99]:
# 다시 시리즈 데이터를 만들어보자
multi_act_score = pd.Series(activities, index=multi_index)
multi_act_score

Adams   20230102    80
        20230103    90
        20230104    95
Moses   20230102    70
        20230103    75
        20230104    72
Miriam  20230102    65
        20230103    82
        2023010     89
dtype: int64

In [100]:
# 인덱싱 조회
multi_act_score['Adams']

20230102    80
20230103    90
20230104    95
dtype: int64

In [101]:
# 하위 인덱싱
multi_act_score['Adams']['20230102']

80

In [102]:
# 슬라이싱
multi_act_score[:, '20230103']

Adams     90
Moses     75
Miriam    82
dtype: int64

In [103]:
# fancy indexing
multi_act_score[['Adams','Miriam']]

Adams   20230102    80
        20230103    90
        20230104    95
Miriam  20230102    65
        20230103    82
        2023010     89
dtype: int64

In [104]:
# 인덱스에 이름 부여
multi_index.names = ['FirstName','Date']
multi_index

MultiIndex([( 'Adams', '20230102'),
            ( 'Adams', '20230103'),
            ( 'Adams', '20230104'),
            ( 'Moses', '20230102'),
            ( 'Moses', '20230103'),
            ( 'Moses', '20230104'),
            ('Miriam', '20230102'),
            ('Miriam', '20230103'),
            ('Miriam',  '2023010')],
           names=['FirstName', 'Date'])

In [105]:
multi_act_score

FirstName  Date    
Adams      20230102    80
           20230103    90
           20230104    95
Moses      20230102    70
           20230103    75
           20230104    72
Miriam     20230102    65
           20230103    82
           2023010     89
dtype: int64

In [106]:
multi_act_score.sort_index()

FirstName  Date    
Adams      20230102    80
           20230103    90
           20230104    95
Miriam     2023010     89
           20230102    65
           20230103    82
Moses      20230102    70
           20230103    75
           20230104    72
dtype: int64

In [107]:
#부울 마스크
multi_act_score[ multi_act_score > 85 ]

FirstName  Date    
Adams      20230103    90
           20230104    95
Miriam     2023010     89
dtype: int64

In [108]:
multi_act_score.iloc[:4]

FirstName  Date    
Adams      20230102    80
           20230103    90
           20230104    95
Moses      20230102    70
dtype: int64

## Multiindex, DataFrame 만들기

In [149]:
import numpy as np
import pandas as pd

tuple_index = \
  [('Adams', '20230102'), ('Adams', '20230103'), ('Adams', '20230104'),
  ('Moses', '20230102'),('Moses', '20230103'),('Moses', '20230104'),
  ('Miriam', '20230102'),('Miriam', '20230103'),('Miriam', '2023010')
  ]
multi_index = pd.MultiIndex.from_tuples(tuple_index)
multi_index.names = ['FirstName','Date']

activities_val = np.array([80, 90, 95, 70,75, 72, 65, 82, 89])

multi_df = pd.DataFrame(data=activities_val, index=multi_index)
multi_df

                     0
FirstName Date        
Adams     20230102  80
          20230103  90
          20230104  95
Moses     20230102  70
          20230103  75
          20230104  72
Miriam    20230102  65
          20230103  82
          2023010   89

        0
Adams NaN
Moses NaN

In [123]:
# 인덱스 정렬
multi_df.sort_index(inplace=True)
multi_df

                     0
FirstName Date        
Adams     20230102  80
          20230103  90
          20230104  95
Miriam    2023010   89
          20230102  65
          20230103  82
Moses     20230102  70
          20230103  75
          20230104  72

In [124]:
multi_df.loc['Adams']

           0
Date        
20230102  80
20230103  90
20230104  95

In [125]:
# Date 인덱스를 열로 변형
multi_df.unstack(level=1)

                0                           
Date      2023010 20230102 20230103 20230104
FirstName                                   
Adams         NaN     80.0     90.0     95.0
Miriam       89.0     65.0     82.0      NaN
Moses         NaN     70.0     75.0     72.0

In [126]:
# FirstName 인덱스를 열로 변형
multi_df.unstack(level=0)

              0             
FirstName Adams Miriam Moses
Date                        
2023010     NaN   89.0   NaN
20230102   80.0   65.0  70.0
20230103   90.0   82.0  75.0
20230104   95.0    NaN  72.0

In [127]:
df2= multi_df.unstack(level=0)
df2

              0             
FirstName Adams Miriam Moses
Date                        
2023010     NaN   89.0   NaN
20230102   80.0   65.0  70.0
20230103   90.0   82.0  75.0
20230104   95.0    NaN  72.0

In [128]:
df2.index

Index(['2023010', '20230102', '20230103', '20230104'], dtype='object', name='Date')

In [129]:
df2.loc['20230102']

   FirstName
0  Adams        80.0
   Miriam       65.0
   Moses        70.0
Name: 20230102, dtype: float64

In [130]:
no_index_df = multi_df.reset_index()
no_index_df

  FirstName      Date   0
0     Adams  20230102  80
1     Adams  20230103  90
2     Adams  20230104  95
3    Miriam   2023010  89
4    Miriam  20230102  65
5    Miriam  20230103  82
6     Moses  20230102  70
7     Moses  20230103  75
8     Moses  20230104  72

In [131]:
no_index_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   FirstName  9 non-null      object
 1   Date       9 non-null      object
 2   0          9 non-null      int32 
dtypes: int32(1), object(2)
memory usage: 308.0+ bytes


In [132]:
set_index_df = no_index_df.set_index(['FirstName', 'Date'])

In [133]:
set_index_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('Adams', '20230102') to ('Moses', '20230104')
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   0       9 non-null      int32
dtypes: int32(1)
memory usage: 501.0+ bytes


In [135]:
set_index_df

                     0
FirstName Date        
Adams     20230102  80
          20230103  90
          20230104  95
Miriam    2023010   89
          20230102  65
          20230103  82
Moses     20230102  70
          20230103  75
          20230104  72

In [134]:
multi_act_score.swaplevel('FirstName', 'Date')

Date      FirstName
20230102  Adams        80
20230103  Adams        90
20230104  Adams        95
20230102  Moses        70
20230103  Moses        75
20230104  Moses        72
20230102  Miriam       65
20230103  Miriam       82
2023010   Miriam       89
dtype: int64

## 간단한 집계 구하기
### sum, mean, max, min, median