In [14]:
import numpy as np 
import pandas as pd 
import seaborn as sns 

# seaborn에 존재하는 titanic 데이터 가져오기 
titanic = sns.load_dataset('titanic')
print(titanic)
titanic.info()

survived  pclass     sex   age  sibsp  parch     fare embarked   class  \
0           0       3    male  22.0      1      0   7.2500        S   Third   
1           1       1  female  38.0      1      0  71.2833        C   First   
2           1       3  female  26.0      0      0   7.9250        S   Third   
3           1       1  female  35.0      1      0  53.1000        S   First   
4           0       3    male  35.0      0      0   8.0500        S   Third   
..        ...     ...     ...   ...    ...    ...      ...      ...     ...   
886         0       2    male  27.0      0      0  13.0000        S  Second   
887         1       1  female  19.0      0      0  30.0000        S   First   
888         0       3  female   NaN      1      2  23.4500        S   Third   
889         1       1    male  26.0      0      0  30.0000        C   First   
890         0       3    male  32.0      0      0   7.7500        Q   Third   

       who  adult_male deck  embark_town alive  alone  


In [15]:
# age, sex, class, fare, survived 컬럼만 추출해서 새로운 데이터프레임 생성
# 방법1
df = titanic[['age', 'sex', 'class', 'fare', 'survived']]
print(df)

age     sex   class     fare  survived
0    22.0    male   Third   7.2500         0
1    38.0  female   First  71.2833         1
2    26.0  female   Third   7.9250         1
3    35.0  female   First  53.1000         1
4    35.0    male   Third   8.0500         0
..    ...     ...     ...      ...       ...
886  27.0    male  Second  13.0000         0
887  19.0  female   First  30.0000         1
888   NaN  female   Third  23.4500         0
889  26.0    male   First  30.0000         1
890  32.0    male   Third   7.7500         0

[891 rows x 5 columns]


In [16]:
# 방법2
df = titanic.loc[:,['age', 'sex', 'class', 'fare', 'survived']]
print(df)

age     sex   class     fare  survived
0    22.0    male   Third   7.2500         0
1    38.0  female   First  71.2833         1
2    26.0  female   Third   7.9250         1
3    35.0  female   First  53.1000         1
4    35.0    male   Third   8.0500         0
..    ...     ...     ...      ...       ...
886  27.0    male  Second  13.0000         0
887  19.0  female   First  30.0000         1
888   NaN  female   Third  23.4500         0
889  26.0    male   First  30.0000         1
890  32.0    male   Third   7.7500         0

[891 rows x 5 columns]


In [17]:
pivot1 = pd.pivot_table(df, 
                        values=['age'], 
                        index=['class'], 
                        columns=['sex'], 
                        aggfunc='mean')
print(pivot1,'\n')

pivot2 = pd.pivot_table(df, 
                        values=['age'], 
                        index=['class'], 
                        columns=['sex'], 
                        aggfunc=['mean','sum'])
print(pivot2)

age           
sex        female       male
class                       
First   34.611765  41.281386
Second  28.722973  30.740707
Third   21.750000  26.507589 

             mean                sum         
              age                age         
sex        female       male  female     male
class                                        
First   34.611765  41.281386  2942.0  4169.42
Second  28.722973  30.740707  2125.5  3043.33
Third   21.750000  26.507589  2218.5  6706.42


In [30]:
# 멀티 인덱스를 마들기 위한 pivot_table 옵션 설정 
pivot3 = pd.pivot_table(df, 
                        values=['age', 'fare'], 
                        index=['class', 'sex'], 
                        columns=['survived'], 
                        aggfunc=['mean','sum'])
print(pivot3)

mean                                        sum           \
                     age                   fare                 age            
survived               0          1           0           1       0        1   
class  sex                                                                     
First  female  25.666667  34.939024  110.604167  105.978159    77.0  2865.00   
       male    44.581967  36.248000   62.894910   74.637320  2719.5  1449.92   
Second female  36.000000  28.080882   18.250000   22.288989   216.0  1909.50   
       male    33.369048  16.022000   19.488965   21.095100  2803.0   240.33   
Third  female  23.818182  19.329787   19.773093   12.464526  1310.0   908.50   
       male    27.255814  22.274211   12.204469   15.579696  5860.0   846.42   

                                     
                    fare             
survived               0          1  
class  sex                           
First  female   331.8125  9644.0125  
       male    4842.9081  335

In [31]:
# 첫번째 인덱스(class) 가 First인 데이터 가져오기
print(pivot3.xs('First'))

mean                                        sum           \
                age                   fare                 age            
survived          0          1           0           1       0        1   
sex                                                                       
female    25.666667  34.939024  110.604167  105.978159    77.0  2865.00   
male      44.581967  36.248000   62.894910   74.637320  2719.5  1449.92   

                                
               fare             
survived          0          1  
sex                             
female     331.8125  9644.0125  
male      4842.9081  3358.6794  


In [33]:
# First 이고 male 인 데이터 가져오기 
print(pivot3.xs(('First', 'male'), level=['class','sex']))

mean                                 sum                      \
                  age              fare               age                fare   
survived            0       1         0         1       0        1          0   
class sex                                                                       
First male  44.581967  36.248  62.89491  74.63732  2719.5  1449.92  4842.9081   

                       
                       
survived            1  
class sex              
First male  3358.6794  


In [35]:
# mean 열의 데이터만 가져오기  (반대방향이라 axis=1옵션)
print(pivot3.xs('mean', axis=1))

age                   fare            
survived               0          1           0           1
class  sex                                                 
First  female  25.666667  34.939024  110.604167  105.978159
       male    44.581967  36.248000   62.894910   74.637320
Second female  36.000000  28.080882   18.250000   22.288989
       male    33.369048  16.022000   19.488965   21.095100
Third  female  23.818182  19.329787   19.773093   12.464526
       male    27.255814  22.274211   12.204469   15.579696
