In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')

## apply

In [3]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [4]:
df['who'].value_counts()

who
man      537
woman    271
child     83
Name: count, dtype: int64

In [5]:
def trans_who(x):
    if x == 'man':
        return '남자'
    if x == 'woman':
        return '여자'
    else:
        return '아이'

In [6]:
df['who'].apply(trans_who)

0      남자
1      여자
2      여자
3      여자
4      남자
       ..
886    남자
887    여자
888    여자
889    남자
890    남자
Name: who, Length: 891, dtype: object

In [7]:
df['who']

0        man
1      woman
2      woman
3      woman
4        man
       ...  
886      man
887    woman
888    woman
889      man
890      man
Name: who, Length: 891, dtype: object

In [8]:
def fare_age(x):
    return x['fare'] / x['age']

In [10]:
df.apply(fare_age, axis=1)

0      0.329545
1      1.875876
2      0.304808
3      1.517143
4      0.230000
         ...   
886    0.481481
887    1.578947
888         NaN
889    1.153846
890    0.242188
Length: 891, dtype: float64

In [11]:
df['survived'].value_counts()

survived
0    549
1    342
Name: count, dtype: int64

In [12]:
df['survived'].apply(lambda x: '생존' if x == 1 else '사망')

0      사망
1      생존
2      생존
3      생존
4      사망
       ..
886    사망
887    생존
888    사망
889    생존
890    사망
Name: survived, Length: 891, dtype: object

## groupby

In [13]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [14]:
df.groupby('sex').mean(numeric_only=True)

Unnamed: 0_level_0,survived,pclass,age,sibsp,parch,fare,adult_male,alone
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,0.742038,2.159236,27.915709,0.694268,0.649682,44.479818,0.0,0.401274
male,0.188908,2.389948,30.726645,0.429809,0.235702,25.523893,0.930676,0.712305


In [15]:
df.groupby(['sex', 'pclass']).mean(numeric_only=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,adult_male,alone
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,1,0.968085,34.611765,0.553191,0.457447,106.125798,0.0,0.361702
female,2,0.921053,28.722973,0.486842,0.605263,21.970121,0.0,0.421053
female,3,0.5,21.75,0.895833,0.798611,16.11881,0.0,0.416667
male,1,0.368852,41.281386,0.311475,0.278689,67.226127,0.97541,0.614754
male,2,0.157407,30.740707,0.342593,0.222222,19.741782,0.916667,0.666667
male,3,0.135447,26.507589,0.498559,0.224784,12.661633,0.919308,0.760807


In [17]:
df.groupby(['sex', 'pclass'])['survived'].mean(numeric_only=True)

# df.groupby(['sex', 'pclass']).mean(numeric_only=True)['survived']

sex     pclass
female  1         0.968085
        2         0.921053
        3         0.500000
male    1         0.368852
        2         0.157407
        3         0.135447
Name: survived, dtype: float64

In [18]:
pd.DataFrame(df.groupby(['sex', 'pclass']).mean(numeric_only=True)['survived'])

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,pclass,Unnamed: 2_level_1
female,1,0.968085
female,2,0.921053
female,3,0.5
male,1,0.368852
male,2,0.157407
male,3,0.135447


In [20]:
df.groupby(['sex', 'pclass']).mean(numeric_only=True)[['survived', 'fare']] #[[]] 데이터프레임 형식 유지

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,fare
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1
female,1,0.968085,106.125798
female,2,0.921053,21.970121
female,3,0.5,16.11881
male,1,0.368852,67.226127
male,2,0.157407,19.741782
male,3,0.135447,12.661633


## reset_index
- 인덱스 할당

In [21]:
df.groupby(['sex', 'pclass'])['survived'].mean().reset_index()

Unnamed: 0,sex,pclass,survived
0,female,1,0.968085
1,female,2,0.921053
2,female,3,0.5
3,male,1,0.368852
4,male,2,0.157407
5,male,3,0.135447


In [25]:
df.groupby(['sex', 'pclass'])[['survived', 'age']].agg(['mean', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,survived,age,age
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,sum,mean,sum
sex,pclass,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
female,1,0.968085,91,34.611765,2942.0
female,2,0.921053,70,28.722973,2125.5
female,3,0.5,72,21.75,2218.5
male,1,0.368852,45,41.281386,4169.42
male,2,0.157407,17,30.740707,3043.33
male,3,0.135447,47,26.507589,6706.42


## pivot_table

In [26]:
df.pivot_table(index='who', values='survived')

Unnamed: 0_level_0,survived
who,Unnamed: 1_level_1
child,0.590361
man,0.163873
woman,0.756458


In [27]:
df.pivot_table(columns='who', values='survived')

who,child,man,woman
survived,0.590361,0.163873,0.756458


In [28]:
df.pivot_table(index=['sex', 'pclass'], values='survived')

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,pclass,Unnamed: 2_level_1
female,1,0.968085
female,2,0.921053
female,3,0.5
male,1,0.368852
male,2,0.157407
male,3,0.135447


In [29]:
df.pivot_table(index=['sex', 'pclass'], values='survived', aggfunc=['sum', 'mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,survived,survived
sex,pclass,Unnamed: 2_level_2,Unnamed: 3_level_2
female,1,91,0.968085
female,2,70,0.921053
female,3,72,0.5
male,1,45,0.368852
male,2,17,0.157407
male,3,47,0.135447
