In [1]:
import pandas as pd
import numpy as np

In [2]:
def basic_check(df):
    '''
    読み込んだデータフレームの
    ・行と列の長さ
    ・各カラムの欠損値の数
    ・各カラムのデータの型
    ・先頭&末尾から5行目まで
    を確認する

    '''
    print('行と列の長さ\n{}'.format(df.shape))
    print('-'*50)
    print('各カラムの欠損値の数\n{}'.format(df.isnull().sum()))
    print('-'*50)
    print(df.info())
    display(df.head(), df.tail())

In [3]:
# データフレームの表示行数、表示列数
pd.set_option('display.max_columns', 2000)
pd.set_option('display.max_rows', 800)

In [4]:
import seaborn as sns

df = sns.load_dataset('titanic')
basic_check(df)

行と列の長さ
(891, 15)
--------------------------------------------------
各カラムの欠損値の数
survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64
--------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
survived       891 non-null int64
pclass         891 non-null int64
sex            891 non-null object
age            714 non-null float64
sibsp          891 non-null int64
parch          891 non-null int64
fare           891 non-null float64
embarked       889 non-null object
class          891 non-null category
who            891 non-null object
adult_male     891 non-null bool
deck           203 non-null category
embark_town    889 non-null object
alive       

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True


## 指定カラム毎の基本統計量を表示

In [5]:
display(df.groupby(['survived']).describe())

Unnamed: 0_level_0,age,age,age,age,age,age,age,age,fare,fare,fare,fare,fare,fare,fare,fare,parch,parch,parch,parch,parch,parch,parch,parch,pclass,pclass,pclass,pclass,pclass,pclass,pclass,pclass,sibsp,sibsp,sibsp,sibsp,sibsp,sibsp,sibsp,sibsp
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2
0,424.0,30.626179,14.17211,1.0,21.0,28.0,39.0,74.0,549.0,22.117887,31.388207,0.0,7.8542,10.5,26.0,263.0,549.0,0.32969,0.823166,0.0,0.0,0.0,0.0,6.0,549.0,2.531876,0.735805,1.0,2.0,3.0,3.0,3.0,549.0,0.553734,1.288399,0.0,0.0,0.0,1.0,8.0
1,290.0,28.34369,14.950952,0.42,19.0,28.0,36.0,80.0,342.0,48.395408,66.596998,0.0,12.475,26.0,57.0,512.3292,342.0,0.464912,0.771712,0.0,0.0,0.0,1.0,5.0,342.0,1.950292,0.863321,1.0,1.0,2.0,3.0,3.0,342.0,0.473684,0.708688,0.0,0.0,0.0,1.0,4.0


In [6]:
display(df.groupby(['survived']).describe().T)

Unnamed: 0,survived,0,1
age,count,424.0,290.0
age,mean,30.626179,28.34369
age,std,14.17211,14.950952
age,min,1.0,0.42
age,25%,21.0,19.0
age,50%,28.0,28.0
age,75%,39.0,36.0
age,max,74.0,80.0
fare,count,549.0,342.0
fare,mean,22.117887,48.395408
