## Summarize Data

In [75]:
import pandas as pd
import seaborn as sns
import numpy as np

In [76]:
df = sns.load_dataset('iris')
df.shape

(150, 5)

In [77]:
df.shape[0]    # == len(df)

150

In [78]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [10]:
df['species'].value_counts()

setosa        50
virginica     50
versicolor    50
Name: species, dtype: int64

In [11]:
df['species'].nunique()

3

In [13]:
df['sepal_length'].sum()

876.5

In [35]:
df['sepal_length'].max()

7.9

In [30]:
df['petal_length'].median()    # 중간 값

4.35

In [44]:
df.median()    # 중간 값

sepal_length    5.80
sepal_width     3.00
petal_length    4.35
petal_width     1.30
dtype: float64

In [31]:
df['petal_length'].mean()    # 평균 값

3.7580000000000005

In [43]:
df.mean()    # 평균 값

sepal_length    5.843333
sepal_width     3.057333
petal_length    3.758000
petal_width     1.199333
dtype: float64

In [45]:
df.var()    # 분산

sepal_length    0.685694
sepal_width     0.189979
petal_length    3.116278
petal_width     0.581006
dtype: float64

In [46]:
df.std()    # 표준편차

sepal_length    0.828066
sepal_width     0.435866
petal_length    1.765298
petal_width     0.762238
dtype: float64

In [36]:
df.quantile([0.25, 0.30, 0.50, 1.00])    # 각 퍼센트? 의 값 => 50% 중간 값, 100% 최대 값

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0.25,5.1,2.8,1.6,0.3
0.3,5.27,2.8,1.7,0.4
0.5,5.8,3.0,4.35,1.3
1.0,7.9,4.4,6.9,2.5


In [41]:
df.loc[:, ['sepal_length', 'petal_length']].quantile([0.25, 0.30, 0.50, 1.00])

Unnamed: 0,sepal_length,petal_length
0.25,5.1,1.6
0.3,5.27,1.7
0.5,5.8,4.35
1.0,7.9,6.9


In [42]:
df.loc[50:100, ['sepal_length', 'petal_length']].quantile([0.25, 0.30, 0.50, 1.00])

Unnamed: 0,sepal_length,petal_length
0.25,5.6,4.0
0.3,5.6,4.0
0.5,5.9,4.4
1.0,7.0,6.0


## Describe

In [20]:
df.describe?

In [14]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [16]:
df.describe(include='all')

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
count,150.0,150.0,150.0,150.0,150
unique,,,,,3
top,,,,,setosa
freq,,,,,50
mean,5.843333,3.057333,3.758,1.199333,
std,0.828066,0.435866,1.765298,0.762238,
min,4.3,2.0,1.0,0.1,
25%,5.1,2.8,1.6,0.3,
50%,5.8,3.0,4.35,1.3,
75%,6.4,3.3,5.1,1.8,


In [19]:
df.describe(exclude=[np.object])

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [23]:
df['sepal_width'].describe()

count    150.000000
mean       3.057333
std        0.435866
min        2.000000
25%        2.800000
50%        3.000000
75%        3.300000
max        4.400000
Name: sepal_width, dtype: float64

In [24]:
df['species'].describe()

count        150
unique         3
top       setosa
freq          50
Name: species, dtype: object

## Apply (function)

In [47]:
df.apply?

In [51]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [52]:
df.apply(lambda x : x[0])

sepal_length       5.1
sepal_width        3.5
petal_length       1.4
petal_width        0.2
species         setosa
dtype: object

In [66]:
df['species'].apply(lambda x: x[0]).head()

0    s
1    s
2    s
3    s
4    s
Name: species, dtype: object

In [83]:
def sample_last_3(x) : 
    x = x[-3:]
    return x

In [79]:
df['species_first_3'] = df['species'].apply(lambda x: x[:3])

In [81]:
df['species_last_3'] = df['species'].apply(sample_last_3)

In [82]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_first_3,species_last_3
0,5.1,3.5,1.4,0.2,setosa,set,osa
1,4.9,3.0,1.4,0.2,setosa,set,osa
2,4.7,3.2,1.3,0.2,setosa,set,osa
3,4.6,3.1,1.5,0.2,setosa,set,osa
4,5.0,3.6,1.4,0.2,setosa,set,osa
5,5.4,3.9,1.7,0.4,setosa,set,osa
6,4.6,3.4,1.4,0.3,setosa,set,osa
7,5.0,3.4,1.5,0.2,setosa,set,osa
8,4.4,2.9,1.4,0.2,setosa,set,osa
9,4.9,3.1,1.5,0.1,setosa,set,osa
