In [1]:
# cf > pandas.pydata.org
# cf >https://pandas.pydata.org/pandas-docs/stable/comparison_with_r.html
import pandas as pd

In [3]:
# Series
s = pd.Series([1, 2, 3, 4, 5])
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [4]:
s[0]

1

In [5]:
s[2:5]

2    3
3    4
4    5
dtype: int64

In [6]:
s % 2 == 0

0    False
1     True
2    False
3     True
4    False
dtype: bool

In [8]:
# filtering
d = pd.date_range("20170829", periods=5)
d

DatetimeIndex(['2017-08-29', '2017-08-30', '2017-08-31', '2017-09-01',
               '2017-09-02'],
              dtype='datetime64[ns]', freq='D')

In [10]:
pd.date_range("20170829", periods=5, freq="2MS")

DatetimeIndex(['2017-09-01', '2017-11-01', '2018-01-01', '2018-03-01',
               '2018-05-01'],
              dtype='datetime64[ns]', freq='2MS')

In [11]:
pd.date_range("20170829", periods=5, freq="2MS") + pd.DateOffset(days=10)

DatetimeIndex(['2017-09-11', '2017-11-11', '2018-01-11', '2018-03-11',
               '2018-05-11'],
              dtype='datetime64[ns]', freq=None)

In [13]:
# dataframe
df = pd.DataFrame({'date': d, 'num' : s})
df

Unnamed: 0,date,num
0,2017-08-29,1
1,2017-08-30,2
2,2017-08-31,3
3,2017-09-01,4
4,2017-09-02,5


In [14]:
df.head(3)

Unnamed: 0,date,num
0,2017-08-29,1
1,2017-08-30,2
2,2017-08-31,3


In [15]:
df.tail(3)

Unnamed: 0,date,num
2,2017-08-31,3
3,2017-09-01,4
4,2017-09-02,5


In [16]:
df.columns

Index(['date', 'num'], dtype='object')

In [17]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [18]:
df.values

array([[Timestamp('2017-08-29 00:00:00'), 1],
       [Timestamp('2017-08-30 00:00:00'), 2],
       [Timestamp('2017-08-31 00:00:00'), 3],
       [Timestamp('2017-09-01 00:00:00'), 4],
       [Timestamp('2017-09-02 00:00:00'), 5]], dtype=object)

In [19]:
df.dtypes

date    datetime64[ns]
num              int64
dtype: object

In [22]:
# how to make a dataframe
d1 = pd.DataFrame(
    [
        {'A' : 1, 'B' : 2},
        {'A' : 3, 'B' : 4}
    ]
)

d1

Unnamed: 0,A,B
0,1,2
1,3,4


In [25]:
df.columns = ["날짜", "번호"]
df

Unnamed: 0,날짜,번호
0,2017-08-29,1
1,2017-08-30,2
2,2017-08-31,3
3,2017-09-01,4
4,2017-09-02,5


In [27]:
df.index = ['A', 'B', 'C', 'D', 'E']
df

Unnamed: 0,날짜,번호
A,2017-08-29,1
B,2017-08-30,2
C,2017-08-31,3
D,2017-09-01,4
E,2017-09-02,5


In [28]:
df.set_index('날짜')

Unnamed: 0_level_0,번호
날짜,Unnamed: 1_level_1
2017-08-29,1
2017-08-30,2
2017-08-31,3
2017-09-01,4
2017-09-02,5


In [29]:
df.sort_values(by='날짜', ascending=False)

Unnamed: 0,날짜,번호
E,2017-09-02,5
D,2017-09-01,4
C,2017-08-31,3
B,2017-08-30,2
A,2017-08-29,1


In [30]:
df[['번호', '날짜']]

Unnamed: 0,번호,날짜
A,1,2017-08-29
B,2,2017-08-30
C,3,2017-08-31
D,4,2017-09-01
E,5,2017-09-02


In [31]:
# selecting rows
df.loc['A']

날짜    2017-08-29 00:00:00
번호                      1
Name: A, dtype: object

In [32]:
df.loc[['A', 'B']]

Unnamed: 0,날짜,번호
A,2017-08-29,1
B,2017-08-30,2


In [33]:
df.loc["A" : "C"]

Unnamed: 0,날짜,번호
A,2017-08-29,1
B,2017-08-30,2
C,2017-08-31,3


In [34]:
df.loc["A", "날짜"]

Timestamp('2017-08-29 00:00:00')

In [35]:
%%timeit
df.at["A", "날짜"]

15.4 µs ± 22.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [36]:
%%timeit
df.loc["A", "날짜"]

19.6 µs ± 31.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [38]:
df.iloc[0, 0]

Timestamp('2017-08-29 00:00:00')

In [39]:
df.iat[0, 0]

Timestamp('2017-08-29 00:00:00')