# [10 Minutes to pandas — pandas 0.20.3 documentation](https://pandas.pydata.org/pandas-docs/stable/10min.html)


10분간 따라할 수 있는 판다스 튜토리얼 이지만 실제로는 1~2시간이 걸린다.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

s = pd.Series([1,3,5,np.nan, 6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [2]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [4]:
df2 = pd.DataFrame({ 'A' : 1.,
                'B' : pd.Timestamp('20130102'),
                'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                'D' : np.array([3] * 4,dtype='int32'),
                'E' : pd.Categorical(["test","train","test","train"]),
                'F' : 'foo' })


In [5]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [6]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.697773,-0.453007,0.922104,-2.278752
2013-01-02,-1.706281,-0.669011,1.55494,-0.465819
2013-01-03,-0.24587,-0.405325,0.273119,1.10387
2013-01-04,-0.337493,0.983144,1.170158,-0.768357
2013-01-05,-0.448749,-0.372132,-1.679186,-0.446368


In [7]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.337493,0.983144,1.170158,-0.768357
2013-01-05,-0.448749,-0.372132,-1.679186,-0.446368
2013-01-06,-1.496089,-1.495113,-1.139425,-1.31102


In [8]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
df.values

array([[ 0.69777334, -0.45300723,  0.92210387, -2.27875237],
       [-1.70628075, -0.66901078,  1.55494006, -0.46581935],
       [-0.24586952, -0.40532483,  0.27311898,  1.10386995],
       [-0.33749282,  0.98314386,  1.17015825, -0.76835744],
       [-0.44874947, -0.37213229, -1.67918601, -0.4463684 ],
       [-1.49608853, -1.49511319, -1.13942538, -1.31102038]])

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.589451,-0.401907,0.183618,-0.694408
std,0.886293,0.798539,1.313594,1.117444
min,-1.706281,-1.495113,-1.679186,-2.278752
25%,-1.234254,-0.61501,-0.786289,-1.175355
50%,-0.393121,-0.429166,0.597611,-0.617088
75%,-0.268775,-0.38043,1.108145,-0.451231
max,0.697773,0.983144,1.55494,1.10387


In [12]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.697773,-1.706281,-0.24587,-0.337493,-0.448749,-1.496089
B,-0.453007,-0.669011,-0.405325,0.983144,-0.372132,-1.495113
C,0.922104,1.55494,0.273119,1.170158,-1.679186,-1.139425
D,-2.278752,-0.465819,1.10387,-0.768357,-0.446368,-1.31102


In [13]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-2.278752,0.922104,-0.453007,0.697773
2013-01-02,-0.465819,1.55494,-0.669011,-1.706281
2013-01-03,1.10387,0.273119,-0.405325,-0.24587
2013-01-04,-0.768357,1.170158,0.983144,-0.337493
2013-01-05,-0.446368,-1.679186,-0.372132,-0.448749
2013-01-06,-1.31102,-1.139425,-1.495113,-1.496089


In [14]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,-1.496089,-1.495113,-1.139425,-1.31102
2013-01-02,-1.706281,-0.669011,1.55494,-0.465819
2013-01-01,0.697773,-0.453007,0.922104,-2.278752
2013-01-03,-0.24587,-0.405325,0.273119,1.10387
2013-01-05,-0.448749,-0.372132,-1.679186,-0.446368
2013-01-04,-0.337493,0.983144,1.170158,-0.768357


# Selection

* .at, .iat, .loc, iloc, .ix

## Getting

In [15]:
df[['A']]

Unnamed: 0,A
2013-01-01,0.697773
2013-01-02,-1.706281
2013-01-03,-0.24587
2013-01-04,-0.337493
2013-01-05,-0.448749
2013-01-06,-1.496089


In [16]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.697773,-0.453007,0.922104,-2.278752
2013-01-02,-1.706281,-0.669011,1.55494,-0.465819
2013-01-03,-0.24587,-0.405325,0.273119,1.10387


In [17]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-1.706281,-0.669011,1.55494,-0.465819
2013-01-03,-0.24587,-0.405325,0.273119,1.10387
2013-01-04,-0.337493,0.983144,1.170158,-0.768357


## Selection by Label

In [18]:
df.loc[dates[0]]

A    0.697773
B   -0.453007
C    0.922104
D   -2.278752
Name: 2013-01-01 00:00:00, dtype: float64

In [19]:
df.loc[:, ['A','B']]

Unnamed: 0,A,B
2013-01-01,0.697773,-0.453007
2013-01-02,-1.706281,-0.669011
2013-01-03,-0.24587,-0.405325
2013-01-04,-0.337493,0.983144
2013-01-05,-0.448749,-0.372132
2013-01-06,-1.496089,-1.495113


In [20]:
df.loc['20130101':'20130105', ['A','B']]

Unnamed: 0,A,B
2013-01-01,0.697773,-0.453007
2013-01-02,-1.706281,-0.669011
2013-01-03,-0.24587,-0.405325
2013-01-04,-0.337493,0.983144
2013-01-05,-0.448749,-0.372132


In [21]:
df.loc['20130103', ['A','B']]

A   -0.245870
B   -0.405325
Name: 2013-01-03 00:00:00, dtype: float64

In [22]:
df.loc[dates[0], 'A']

0.69777334215178755

In [23]:
df.at[dates[0], 'A']

0.69777334215178755

### Selection by Position

In [24]:
df.iloc[3]

A   -0.337493
B    0.983144
C    1.170158
D   -0.768357
Name: 2013-01-04 00:00:00, dtype: float64

In [25]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-0.337493,0.983144
2013-01-05,-0.448749,-0.372132


In [26]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,-1.706281,-0.669011,1.55494,-0.465819
2013-01-03,-0.24587,-0.405325,0.273119,1.10387


In [27]:
df.iloc[1,1]

-0.66901078463061026

In [28]:
df.iat[1,1]  # 위 iloc와 같은 결과를 보여준다. 스칼라에? 좀 더 빠르게 접근한다.

-0.66901078463061026

### Boolean Indexing

In [29]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.697773,-0.453007,0.922104,-2.278752


In [30]:
df[df > 0] # 음수값은 NaN으로 출력 된다.

Unnamed: 0,A,B,C,D
2013-01-01,0.697773,,0.922104,
2013-01-02,,,1.55494,
2013-01-03,,,0.273119,1.10387
2013-01-04,,0.983144,1.170158,
2013-01-05,,,,
2013-01-06,,,,


In [31]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.697773,-0.453007,0.922104,-2.278752,one
2013-01-02,-1.706281,-0.669011,1.55494,-0.465819,one
2013-01-03,-0.24587,-0.405325,0.273119,1.10387,two
2013-01-04,-0.337493,0.983144,1.170158,-0.768357,three
2013-01-05,-0.448749,-0.372132,-1.679186,-0.446368,four
2013-01-06,-1.496089,-1.495113,-1.139425,-1.31102,three


### setting