In [1]:
import pandas as pd
import numpy as np

### Object creation

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [5]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [7]:
df2.describe()

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


### Viewing Data

In [8]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511


In [9]:
df.tail(2)

Unnamed: 0,A,B,C,D
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [12]:
df.to_numpy()

array([[ 0.34781771,  1.66680577,  1.23980025, -0.18400738],
       [ 0.22147056,  2.66027731,  0.26742299,  0.702617  ],
       [ 0.89391324, -1.65949923, -0.8488592 , -0.42925947],
       [ 0.91397656,  0.47143899, -0.75406288,  0.09641519],
       [ 0.22501123,  0.36390658, -0.15387443, -0.54151067],
       [-0.4463253 ,  1.61762689, -0.92972615,  0.2992336 ]])

In [13]:
df2.to_numpy() # expensive

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.359311,0.853426,-0.19655,-0.009419
std,0.5059,1.497374,0.843098,0.469751
min,-0.446325,-1.659499,-0.929726,-0.541511
25%,0.222356,0.39079,-0.82516,-0.367946
50%,0.286414,1.044533,-0.453969,-0.043796
75%,0.757389,1.654511,0.162099,0.248529
max,0.913977,2.660277,1.2398,0.702617


In [15]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.347818,0.221471,0.893913,0.913977,0.225011,-0.446325
B,1.666806,2.660277,-1.659499,0.471439,0.363907,1.617627
C,1.2398,0.267423,-0.848859,-0.754063,-0.153874,-0.929726
D,-0.184007,0.702617,-0.429259,0.096415,-0.541511,0.299234


In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.184007,1.2398,1.666806,0.347818
2013-01-02,0.702617,0.267423,2.660277,0.221471
2013-01-03,-0.429259,-0.848859,-1.659499,0.893913
2013-01-04,0.096415,-0.754063,0.471439,0.913977
2013-01-05,-0.541511,-0.153874,0.363907,0.225011
2013-01-06,0.299234,-0.929726,1.617627,-0.446325


In [17]:
df.sort_values(by="C", ascending=False)

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


### Selection

#### Getting

In [18]:
df["A"]

2013-01-01    0.347818
2013-01-02    0.221471
2013-01-03    0.893913
2013-01-04    0.913977
2013-01-05    0.225011
2013-01-06   -0.446325
Freq: D, Name: A, dtype: float64

In [22]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259


In [28]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [21]:
df["20130102":"20130103"]

Unnamed: 0,A,B,C,D
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259


#### Selection by label

In [29]:
df.loc[dates[0]]

A    0.347818
B    1.666806
C    1.239800
D   -0.184007
Name: 2013-01-01 00:00:00, dtype: float64

In [32]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [44]:
df.loc[:df.index[2], ["A", "B"]]

Unnamed: 0,A,B
2013-01-01,0.347818,1.666806
2013-01-02,0.221471,2.660277
2013-01-03,0.893913,-1.659499


In [45]:
df.loc["20130102":"20130104", ["A", "B"]]

Unnamed: 0,A,B
2013-01-02,0.221471,2.660277
2013-01-03,0.893913,-1.659499
2013-01-04,0.913977,0.471439


In [47]:
df.loc["20130102", ["A", "B"]]

A    0.221471
B    2.660277
Name: 2013-01-02 00:00:00, dtype: float64

In [48]:
df.loc[dates[0], "A"]

0.34781771235294845

In [49]:
df.at[dates[0], "A"]

0.34781771235294845

#### Selection by position

In [50]:
df.iloc[3]

A    0.913977
B    0.471439
C   -0.754063
D    0.096415
Name: 2013-01-04 00:00:00, dtype: float64

In [51]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.913977,0.471439
2013-01-05,0.225011,0.363907


In [52]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,0.221471,0.267423
2013-01-03,0.893913,-0.848859
2013-01-05,0.225011,-0.153874


In [53]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259


In [54]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,1.666806,1.2398
2013-01-02,2.660277,0.267423
2013-01-03,-1.659499,-0.848859
2013-01-04,0.471439,-0.754063
2013-01-05,0.363907,-0.153874
2013-01-06,1.617627,-0.929726


In [55]:
df.iloc[1, 1]

2.66027730844362

In [56]:

df.iat[1, 1]

2.66027730844362

In [57]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511


In [58]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,,,
2013-01-04,0.913977,0.471439,,0.096415
2013-01-05,0.225011,0.363907,,
2013-01-06,,1.617627,,0.299234


In [59]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [60]:
df3 = df.copy()
df3["E"] = ["one", "one", "two", "three", "four", "three"]
df3

Unnamed: 0,A,B,C,D,E
2013-01-01,0.347818,1.666806,1.2398,-0.184007,one
2013-01-02,0.221471,2.660277,0.267423,0.702617,one
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259,two
2013-01-04,0.913977,0.471439,-0.754063,0.096415,three
2013-01-05,0.225011,0.363907,-0.153874,-0.541511,four
2013-01-06,-0.446325,1.617627,-0.929726,0.299234,three


In [61]:
df3[df3['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259,two
2013-01-05,0.225011,0.363907,-0.153874,-0.541511,four


#### Setting

In [66]:
s1 = pd.Series([1, 2, 3, 4, 5, 6, 7], index=pd.date_range("20130102", periods=7))

In [67]:
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
2013-01-08    7
Freq: D, dtype: int64

In [69]:
df.at[dates[0], "A"] = 0

In [70]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.0,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [71]:
df.iat[0, 1] = 0

In [72]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.0,0.0,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [73]:
df.loc[:, "D"] = np.array([5] * len(df))

In [74]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.0,0.0,1.2398,5
2013-01-02,0.221471,2.660277,0.267423,5
2013-01-03,0.893913,-1.659499,-0.848859,5
2013-01-04,0.913977,0.471439,-0.754063,5
2013-01-05,0.225011,0.363907,-0.153874,5
2013-01-06,-0.446325,1.617627,-0.929726,5


In [75]:
len(df)

6

In [77]:
df.shape

(6, 4)

In [78]:
df3

Unnamed: 0,A,B,C,D,E
2013-01-01,0.347818,1.666806,1.2398,-0.184007,one
2013-01-02,0.221471,2.660277,0.267423,0.702617,one
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259,two
2013-01-04,0.913977,0.471439,-0.754063,0.096415,three
2013-01-05,0.225011,0.363907,-0.153874,-0.541511,four
2013-01-06,-0.446325,1.617627,-0.929726,0.299234,three


In [81]:
df3.drop("E", axis=1, inplace=True)
df3

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,-0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,0.913977,0.471439,-0.754063,0.096415
2013-01-05,0.225011,0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,1.617627,-0.929726,0.299234


In [92]:
df3[df3>0] = -df3
df3

Unnamed: 0,A,B,C,D
2013-01-01,-0.347818,-1.666806,-1.2398,-0.184007
2013-01-02,-0.221471,-2.660277,-0.267423,-0.702617
2013-01-03,-0.893913,-1.659499,-0.848859,-0.429259
2013-01-04,-0.913977,-0.471439,-0.754063,-0.096415
2013-01-05,-0.225011,-0.363907,-0.153874,-0.541511
2013-01-06,-0.446325,-1.617627,-0.929726,-0.299234


In [93]:
df3[df3<0]  = -df3
df3

Unnamed: 0,A,B,C,D
2013-01-01,0.347818,1.666806,1.2398,0.184007
2013-01-02,0.221471,2.660277,0.267423,0.702617
2013-01-03,0.893913,1.659499,0.848859,0.429259
2013-01-04,0.913977,0.471439,0.754063,0.096415
2013-01-05,0.225011,0.363907,0.153874,0.541511
2013-01-06,0.446325,1.617627,0.929726,0.299234
