## Titanic 데이터 분석 연습 문제

In [3]:

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Titanic 데이터셋 로드
df = sns.load_dataset('titanic')
df.head()


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


### 🔰 1단계 (기초)

In [None]:

# 1. 전체 탑승객 수
len(df)

# 2. 전체 생존율
df['survived'].mean()

# 3. 성별별 생존율 시각화
sns.barplot(data=df, x='sex', y='survived')
plt.title('Survival Rate by Sex')
plt.show()

# 4. pclass별 탑승객 수 시각화
sns.countplot(data=df, x='pclass')
plt.title('Passenger Count by Class')
plt.show()

# 5. 1등급 여성 생존율
df[(df['pclass']==1) & (df['sex']=='female')]['survived'].mean()


### 🔰 2단계 (중급)

In [4]:

# 6. pclass와 sex 기준 생존율
df.groupby(['pclass','sex'])['survived'].mean()

# 7. pclass vs survived 교차표
pd.crosstab(df['pclass'], df['survived'])

# 8. 나이를 10살 단위로 나눠 생존율 계산
df['age_group'] = pd.cut(df['age'], bins=range(0, 81, 10))
df.groupby('age_group')['survived'].mean()

# 9. 형제/자매 수에 따른 생존율
df.groupby('sibsp')['survived'].mean()

# 10. 요금 상위 25% 승객의 생존율
q3 = df['fare'].quantile(0.75)
df[df['fare'] >= q3]['survived'].mean()


  df.groupby('age_group')['survived'].mean()


np.float64(0.5822222222222222)

In [1]:
list(range(0,81,10))

[0, 10, 20, 30, 40, 50, 60, 70, 80]

### 🔰 3단계 (심화)

In [None]:

# 11. age vs fare 산점도 (생존 여부 색상 구분)
sns.scatterplot(data=df, x='age', y='fare', hue='survived')
plt.title('Age vs Fare with Survival')
plt.show()

# 12. 성별 + 등급별 생존율 히트맵
pivot = df.pivot_table(values='survived', index='sex', columns='pclass', aggfunc='mean')
sns.heatmap(pivot, annot=True, cmap='Blues')
plt.title('Survival Rate by Sex and Class')
plt.show()

# 13. child/adult 컬럼 생성 후 생존율 비교
df['group'] = df['age'].apply(lambda x: 'child' if x < 18 else 'adult')
df.groupby('group')['survived'].mean()

# 14. 1등급 여성 vs 3등급 남성 생존율 비교
first_female = df[(df['pclass']==1) & (df['sex']=='female')]['survived'].mean()
third_male = df[(df['pclass']==3) & (df['sex']=='male')]['survived'].mean()
first_female, third_male

# 15. deck별 생존율 히트맵
deck_pivot = df.pivot_table(values='survived', index='deck', aggfunc='mean')
sns.heatmap(deck_pivot, annot=True, cmap='Greens')
plt.title('Survival Rate by Deck')
plt.show()
