In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn')
sns.set(font_scale=2.5)

import missingno as msno

# ignore warinings
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [None]:
df_train = pd.read_csv('../input/titanic/train.csv')
df_test = pd.read_csv('../input/titanic/test.csv')

In [None]:
 df_train.head()

In [None]:
 df_train.describe()

In [None]:
df_train.shape

In [None]:
for col in df_train.columns:
    msg = 'column: {:>10}\t Percent of NaN Value: {:2f}%'.format(col, 100 * (df_train[col].isnull().sum() / df_train[col].shape[0]))
    print(msg)

In [None]:
df_train[col].isnull().sum()

In [None]:
msno.matrix(df = df_train.iloc[:, :], figsize=(8, 8), color=(0.8, 0.5, 0.2))

In [None]:
msno.bar(df = df_train.iloc[:, :], figsize=(8, 8), color=(0.8, 0.5, 0.2))

In [None]:
f, ax = plt.subplots(1, 2, figsize=(18, 8))

df_train['Survived'].value_counts().plot.pie(explode=[0, 0.1], autopct='%1.1f%%', ax=ax[0], shadow=True)
ax[0].set_title('Pie plot - Survived')
ax[0].set_ylabel('')
sns.countplot('Survived', data=df_train, ax=ax[1])
plt.show()

In [None]:
df_train['Survived'].value_counts().plot.pie(explode=[0, 0.1], autopct='%1.1f%%', shadow=True)

**2.1Pclass**

In [None]:
df_train[['Pclass', 'Survived']].groupby(['Pclass'], as_index=True).count()

In [None]:
df_train[['Pclass', 'Survived']].groupby(['Pclass']).sum()

In [None]:
pd.crosstab(df_train['Pclass'], df_train['Survived'], margins=True).style.background_gradient(cmap='summer_r')

In [None]:
df_train[['Pclass', 'Survived']].groupby(['Pclass'], as_index=True).mean().sort_values(by='Survived', ascending=False).plot.bar()

In [None]:
df_train[['Pclass', 'Survived']].groupby(['Pclass'], as_index=True).count()

In [None]:
y_position = 1.02
f, ax = plt.subplots(1, 2, figsize=(18, 8))
df_train['Pclass'].value_counts().plot.bar(color=['#CD7F32', '#FFDF00', '#D3D3D3'], ax=ax[0])
ax[0].set_title('Number of passengers By Pclass', y=y_position)
ax[0].set_ylabel('Count')
sns.countplot('Pclass', hue='Survived', data=df_train, ax=ax[1])
ax[1].set_title('Pclass: Survived vs Dead', y=y_position)
plt.show()

**2.2 Sex**

In [None]:
f, ax = plt.subplots(1, 2, figsize=(18, 8))
df_train[['Sex', 'Survived']].groupby(['Sex'], as_index=True).mean().plot.bar(ax=ax[0])
ax[0].set_title('Survived vs Sex')
sns.countplot('Sex', hue='Survived', data=df_train, ax=ax[1])
ax[1].set_title('Sex: Survived vs Dead')
plt.show()

In [None]:
df_train[['Sex', 'Survived']].groupby(['Sex'], as_index=True).mean()

In [None]:
pd.crosstab(df_train['Sex'], df_train['Survived'], margins=True).style.background_gradient(cmap='summer_r')

In [None]:
sns.factorplot('Pclass', 'Survived', hue='Sex', data=df_train, aspect=1.5)

- Lady first,
- Money brings survival?

In [None]:
sns.factorplot(x='Sex', y='Survived', col='Pclass', data=df_train, saturation=.5, size=9, aspect=1)

**Age**

In [None]:
print("제일 나이 많은 탑승객: {:.1f} years".format(df_train['Age'].max()))
print("제일 어린 탑승객: {:.1f} years".format(df_train['Age'].min()))
print("탑승객 평균 나이: {:.1f} years".format(df_train['Age'].mean()))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(9, 5))
sns.kdeplot(df_train[df_train['Survived'] == 1]['Age'], ax=ax)
sns.kdeplot(df_train[df_train['Survived'] == 0]['Age'], ax=ax)
plt.legend(['Survived == 1', 'Survived == 0'])
plt.show()

In [None]:
df_train[df_train['Survived'] == 1]['Age'].hist()

In [None]:
plt.figure(figsize=(8, 6))
df_train['Age'][df_train['Pclass'] == 1].plot(kind='kde')
df_train['Age'][df_train['Pclass'] == 2].plot(kind='kde')
df_train['Age'][df_train['Pclass'] == 3].plot(kind='kde')

plt.xlabel('Age')  # ax = set_xlabel()
plt.title('Age Distribution within classes')
plt.legend(['1st Class', '2nd Class', '3rd Class'])

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(9, 5))
sns.kdeplot(df_train[(df_train['Survived'] == 0) & (df_train['Pclass'] == 1)]['Age'], ax=ax)
sns.kdeplot(df_train[(df_train['Survived'] == 1) & (df_train['Pclass'] == 1)]['Age'], ax=ax)
plt.legend(['Survived == 0', 'Survived == 1'])
plt.show()