In [1]:
%pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
# 라이브러리 불러오기
import pandas as pd
import seaborn as sns

# # titanic 데이터셋 로딩
titanic = sns.load_dataset('titanic')

# 행 인덱스 0~9 범위에서 age, fare 2개 열을 선택하여 데이터프레임 만들기
df = titanic.loc[0:9, ['age','fare']]

# 데이터프레임 출력
df

Unnamed: 0,age,fare
0,22.0,7.25
1,38.0,71.2833
2,26.0,7.925
3,35.0,53.1
4,35.0,8.05
5,,8.4583
6,54.0,51.8625
7,2.0,21.075
8,27.0,11.1333
9,14.0,30.0708


In [6]:
# 조건식 적용 (20세 미만: age < 20)
df['age'] < 20

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7     True
8    False
9     True
Name: age, dtype: bool

In [32]:
# 불린 인덱싱 (20세 미만의 조건을 충족하는 행을 필터링)
df.query('age < 30')

Unnamed: 0,age,fare
0,22.0,7.25
2,26.0,7.925
7,2.0,21.075
8,27.0,11.1333
9,14.0,30.0708


In [9]:
# 불린 인덱싱 (loc 인덱서 활용)
df.loc[df['age'] < 20]

Unnamed: 0,age,fare
7,2.0,21.075
9,14.0,30.0708


In [11]:
# 불린 인덱싱 (NOT 논리연산)
df.loc[~(df['age'] < 20)]

Unnamed: 0,age,fare
0,22.0,7.25
1,38.0,71.2833
2,26.0,7.925
3,35.0,53.1
4,35.0,8.05
5,,8.4583
6,54.0,51.8625
8,27.0,11.1333


In [23]:
# 나이가 10대(10~19세)인 승객 (AND 논리연산)
mask1 = (titanic.age >= 10) & (titanic.age < 20)
df_teenage = titanic[mask1]
df_teenage.head()
titanic.drop (columns = ['alive'])


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,True


In [None]:
# 나이가 10세 미만(0~9세)이고 여성인 승객 (AND 논리연산, loc 인덱서)
mask2 = (titanic.age < 10) & (titanic.sex == 'female')
df_female_under10 = titanic.loc[mask2]
df_female_under10.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,yes,False
24,0,3,female,8.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
43,1,2,female,3.0,1,2,41.5792,C,Second,child,False,,Cherbourg,yes,False
58,1,2,female,5.0,1,2,27.75,S,Second,child,False,,Southampton,yes,False
119,0,3,female,2.0,4,2,31.275,S,Third,child,False,,Southampton,no,False


In [29]:
# 여성이면서 팁이 5달러 이상인 고객
df.query ("gender == 'Female'and tip > 5")

UndefinedVariableError: name 'gender' is not defined

In [None]:
# 1등석, 2등석, 3등석 pclass
# 선실별 승객수   len (데이터프레임)
pd.Series([
    
    len(titanic[titanic['pclass']==1]),
    len(titanic[titanic['pclass']==2]),
    len(titanic[titanic['pclass']==3]),
]
)


0    216
1    184
2    491
dtype: int64

In [10]:
# 나이가 10세 미만(0~9세) 또는 60세 이상인 승객 (OR 논리연산)
# age, sex, alone 열만 선택 (loc 인덱서, fancy indexing)
mask3 = (titanic.age < 10) | (titanic.age >= 60)
df_under10_morethan60 = titanic.loc[mask3, ['age', 'sex', 'alone']]
df_under10_morethan60.head()

Unnamed: 0,age,sex,alone
7,2.0,male,False
10,4.0,female,False
16,2.0,male,False
24,8.0,female,False
33,66.0,male,True


In [None]:
# 불린 시리즈를 새로운 열로 추가
df['age_under_20'] = df['age'] < 20
df.head()


Unnamed: 0,age,fare,age_under_20
0,22.0,7.25,False
1,38.0,71.2833,False
2,26.0,7.925,False
3,35.0,53.1,False
4,35.0,8.05,False
