In [5]:
import pandas as pd
import numpy as np

## 1. object creation

In [7]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [10]:
dates = pd.date_range('20130101', periods = 6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df = pd.DataFrame(np.random.randn(6,4), index = dates, columns = list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656
2013-01-06,-1.183454,0.026522,-0.83185,0.09251


In [14]:
df2 = pd.DataFrame({'A':1.,
                    'B':pd.Timestamp('20130102'),
                    'C':pd.Series(1, index = list(range(4)), dtype = 'float32'),
                    'D':np.array([3]*4, dtype = 'int32'),
                    'E':pd.Categorical(["test","train","test","train"]),
                    'F':'Foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,Foo
1,1.0,2013-01-02,1.0,3,train,Foo
2,1.0,2013-01-02,1.0,3,test,Foo
3,1.0,2013-01-02,1.0,3,train,Foo


In [16]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## 2. Viewing Data

In [17]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656


In [18]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656
2013-01-06,-1.183454,0.026522,-0.83185,0.09251


In [20]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [22]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [23]:
df.to_numpy()

array([[ 0.36311972,  0.44692682, -0.1801953 , -0.0991164 ],
       [ 1.40634686,  0.44802707,  0.75308663,  0.19876736],
       [-2.33794835,  0.97071241,  2.54479492, -0.17615221],
       [-0.85187956, -0.87411921, -2.20075986, -0.30531517],
       [-0.33472536, -0.35604368, -0.34803304, -0.53765618],
       [-1.1834539 ,  0.02652245, -0.83185044,  0.09251022]])

In [24]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'Foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'Foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'Foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'Foo']],
      dtype=object)

In [25]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.489757,0.110338,-0.043826,-0.137827
std,1.294203,0.657459,1.595024,0.267295
min,-2.337948,-0.874119,-2.20076,-0.537656
25%,-1.10056,-0.260402,-0.710896,-0.273024
50%,-0.593302,0.236725,-0.264114,-0.137634
75%,0.188658,0.447752,0.519766,0.044604
max,1.406347,0.970712,2.544795,0.198767


In [27]:
# 행,열 바꿔줌
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.36312,1.406347,-2.337948,-0.85188,-0.334725,-1.183454
B,0.446927,0.448027,0.970712,-0.874119,-0.356044,0.026522
C,-0.180195,0.753087,2.544795,-2.20076,-0.348033,-0.83185
D,-0.099116,0.198767,-0.176152,-0.305315,-0.537656,0.09251


In [28]:
df.sort_index(axis = 1, ascending = False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.099116,-0.180195,0.446927,0.36312
2013-01-02,0.198767,0.753087,0.448027,1.406347
2013-01-03,-0.176152,2.544795,0.970712,-2.337948
2013-01-04,-0.305315,-2.20076,-0.874119,-0.85188
2013-01-05,-0.537656,-0.348033,-0.356044,-0.334725
2013-01-06,0.09251,-0.83185,0.026522,-1.183454


In [29]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656
2013-01-06,-1.183454,0.026522,-0.83185,0.09251
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152


## 3. SELECTION 

In [43]:
# getting
df
df['A']

2013-01-01    0.363120
2013-01-02    1.406347
2013-01-03   -2.337948
2013-01-04   -0.851880
2013-01-05   -0.334725
2013-01-06   -1.183454
Freq: D, Name: A, dtype: float64

In [44]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656
2013-01-06,-1.183454,0.026522,-0.83185,0.09251


In [39]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152


In [42]:
df['20130103':'20130104']

Unnamed: 0,A,B,C,D
2013-01-03,-2.337948,0.970712,2.544795,-0.176152
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315


In [45]:
# selection by lable
df.loc[dates[0]]

A    0.363120
B    0.446927
C   -0.180195
D   -0.099116
Name: 2013-01-01 00:00:00, dtype: float64

In [54]:
df.loc[:,['A','B']]

Unnamed: 0,A,B
2013-01-01,0.36312,0.446927
2013-01-02,1.406347,0.448027
2013-01-03,-2.337948,0.970712
2013-01-04,-0.85188,-0.874119
2013-01-05,-0.334725,-0.356044
2013-01-06,-1.183454,0.026522


In [55]:
df.loc['20130102':'20130104', ['A','B']]

Unnamed: 0,A,B
2013-01-02,1.406347,0.448027
2013-01-03,-2.337948,0.970712
2013-01-04,-0.85188,-0.874119


In [56]:
df.loc[dates[0],['A','B']]

A    0.363120
B    0.446927
Name: 2013-01-01 00:00:00, dtype: float64

In [57]:
df.loc[dates[0],'A']

0.3631197210095192

In [58]:
# Selection by Position 
df.iloc[3]

A   -0.851880
B   -0.874119
C   -2.200760
D   -0.305315
Name: 2013-01-04 00:00:00, dtype: float64

In [63]:
df.iloc[0:2,0:2]

Unnamed: 0,A,B
2013-01-01,0.36312,0.446927
2013-01-02,1.406347,0.448027


In [62]:
df.iloc[[1,3,5],[0,2]]

Unnamed: 0,A,C
2013-01-02,1.406347,0.753087
2013-01-04,-0.85188,-2.20076
2013-01-06,-1.183454,-0.83185


In [64]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152


In [65]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2013-01-01,0.446927,-0.180195
2013-01-02,0.448027,0.753087
2013-01-03,0.970712,2.544795
2013-01-04,-0.874119,-2.20076
2013-01-05,-0.356044,-0.348033
2013-01-06,0.026522,-0.83185


In [66]:
df.iloc[1,1]

0.44802707463171126

In [67]:
#For getting fast access to a scalar (equivalent to the prior method)
df.iat[1,1]

0.44802707463171126

In [68]:
# Boolean Index
df[df['A']>0]

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767


In [69]:
df['A']>0

2013-01-01     True
2013-01-02     True
2013-01-03    False
2013-01-04    False
2013-01-05    False
2013-01-06    False
Freq: D, Name: A, dtype: bool

In [70]:
df[df>0]

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,,
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,,0.970712,2.544795,
2013-01-04,,,,
2013-01-05,,,,
2013-01-06,,0.026522,,0.09251


In [72]:
df2 = df.copy()
df2

Unnamed: 0,A,B,C,D
2013-01-01,0.36312,0.446927,-0.180195,-0.099116
2013-01-02,1.406347,0.448027,0.753087,0.198767
2013-01-03,-2.337948,0.970712,2.544795,-0.176152
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656
2013-01-06,-1.183454,0.026522,-0.83185,0.09251


In [75]:
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.36312,0.446927,-0.180195,-0.099116,one
2013-01-02,1.406347,0.448027,0.753087,0.198767,one
2013-01-03,-2.337948,0.970712,2.544795,-0.176152,two
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315,three
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656,four
2013-01-06,-1.183454,0.026522,-0.83185,0.09251,three


In [78]:
# Using the isin() method for filtering
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-2.337948,0.970712,2.544795,-0.176152,two
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656,four


In [80]:
# Setting
s1 = pd.Series([1,2,3,4,5,6], index = pd.date_range('20130102', periods = 6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [82]:
df['f'] = s1
df

Unnamed: 0,A,B,C,D,f
2013-01-01,0.36312,0.446927,-0.180195,-0.099116,
2013-01-02,1.406347,0.448027,0.753087,0.198767,1.0
2013-01-03,-2.337948,0.970712,2.544795,-0.176152,2.0
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315,3.0
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656,4.0
2013-01-06,-1.183454,0.026522,-0.83185,0.09251,5.0


In [84]:
df.at[dates[0],'A'] = 0
df

Unnamed: 0,A,B,C,D,f
2013-01-01,0.0,0.446927,-0.180195,-0.099116,
2013-01-02,1.406347,0.448027,0.753087,0.198767,1.0
2013-01-03,-2.337948,0.970712,2.544795,-0.176152,2.0
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315,3.0
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656,4.0
2013-01-06,-1.183454,0.026522,-0.83185,0.09251,5.0


In [86]:
df.iat[0,1] = 0
df

Unnamed: 0,A,B,C,D,f
2013-01-01,0.0,0.0,-0.180195,-0.099116,
2013-01-02,1.406347,0.448027,0.753087,0.198767,1.0
2013-01-03,-2.337948,0.970712,2.544795,-0.176152,2.0
2013-01-04,-0.85188,-0.874119,-2.20076,-0.305315,3.0
2013-01-05,-0.334725,-0.356044,-0.348033,-0.537656,4.0
2013-01-06,-1.183454,0.026522,-0.83185,0.09251,5.0


In [89]:
df.loc[:,'D'] = np.array([5]*len(df))
df

Unnamed: 0,A,B,C,D,f
2013-01-01,0.0,0.0,-0.180195,5,
2013-01-02,1.406347,0.448027,0.753087,5,1.0
2013-01-03,-2.337948,0.970712,2.544795,5,2.0
2013-01-04,-0.85188,-0.874119,-2.20076,5,3.0
2013-01-05,-0.334725,-0.356044,-0.348033,5,4.0
2013-01-06,-1.183454,0.026522,-0.83185,5,5.0


In [95]:
df = df.fillna(0)
df

Unnamed: 0,A,B,C,D,f
2013-01-01,0.0,0.0,-0.180195,5,0.0
2013-01-02,1.406347,0.448027,0.753087,5,1.0
2013-01-03,-2.337948,0.970712,2.544795,5,2.0
2013-01-04,-0.85188,-0.874119,-2.20076,5,3.0
2013-01-05,-0.334725,-0.356044,-0.348033,5,4.0
2013-01-06,-1.183454,0.026522,-0.83185,5,5.0


In [96]:
df2 = df.copy()

In [97]:
df2[df2>0] = -df2
df2

Unnamed: 0,A,B,C,D,f
2013-01-01,0.0,0.0,-0.180195,-5,0.0
2013-01-02,-1.406347,-0.448027,-0.753087,-5,-1.0
2013-01-03,-2.337948,-0.970712,-2.544795,-5,-2.0
2013-01-04,-0.85188,-0.874119,-2.20076,-5,-3.0
2013-01-05,-0.334725,-0.356044,-0.348033,-5,-4.0
2013-01-06,-1.183454,-0.026522,-0.83185,-5,-5.0
