In [1]:
import numpy as np
import pandas as pd

## Object creation

In [2]:
#Series
s = pd.Series([1, 2, 3, np.nan, 6, 8])
s

0    1.0
1    2.0
2    3.0
3    NaN
4    6.0
5    8.0
dtype: float64

### DataFrame from numpy array

In [3]:
dates = pd.date_range('20120830', periods=7)

In [4]:
dates

DatetimeIndex(['2012-08-30', '2012-08-31', '2012-09-01', '2012-09-02',
               '2012-09-03', '2012-09-04', '2012-09-05'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(7, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2012-08-30,0.244408,-0.215245,0.197102,0.833892
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478
2012-09-01,-0.919489,0.477583,0.3138,-0.305103
2012-09-02,-0.496799,-0.910658,-0.399775,-0.290959
2012-09-03,-1.831434,-0.339916,-0.104089,1.161046
2012-09-04,0.420831,2.1333,0.231224,-0.384533
2012-09-05,0.01325,1.011428,0.556429,-0.696156


## Viewing data

In [6]:
df.head()

Unnamed: 0,A,B,C,D
2012-08-30,0.244408,-0.215245,0.197102,0.833892
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478
2012-09-01,-0.919489,0.477583,0.3138,-0.305103
2012-09-02,-0.496799,-0.910658,-0.399775,-0.290959
2012-09-03,-1.831434,-0.339916,-0.104089,1.161046


In [7]:
df.tail(3)

Unnamed: 0,A,B,C,D
2012-09-03,-1.831434,-0.339916,-0.104089,1.161046
2012-09-04,0.420831,2.1333,0.231224,-0.384533
2012-09-05,0.01325,1.011428,0.556429,-0.696156


In [8]:
df.index

DatetimeIndex(['2012-08-30', '2012-08-31', '2012-09-01', '2012-09-02',
               '2012-09-03', '2012-09-04', '2012-09-05'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
df.to_numpy()

array([[ 0.24440754, -0.21524546,  0.19710176,  0.83389188],
       [-2.27162422, -0.42715168,  1.00870988, -0.06647802],
       [-0.91948938,  0.47758343,  0.31379995, -0.30510297],
       [-0.49679937, -0.91065796, -0.39977525, -0.29095947],
       [-1.83143365, -0.33991562, -0.10408949,  1.16104558],
       [ 0.4208309 ,  2.13330006,  0.2312238 , -0.38453303],
       [ 0.0132501 ,  1.01142789,  0.55642865, -0.696156  ]])

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,7.0,7.0,7.0,7.0
mean,-0.691551,0.247049,0.257628,0.035958
std,1.040513,1.044704,0.451653,0.689083
min,-2.271624,-0.910658,-0.399775,-0.696156
25%,-1.375462,-0.383534,0.046506,-0.344818
50%,-0.496799,-0.215245,0.231224,-0.290959
75%,0.128829,0.744506,0.435114,0.383707
max,0.420831,2.1333,1.00871,1.161046


In [12]:
df.T

Unnamed: 0,2012-08-30,2012-08-31,2012-09-01,2012-09-02,2012-09-03,2012-09-04,2012-09-05
A,0.244408,-2.271624,-0.919489,-0.496799,-1.831434,0.420831,0.01325
B,-0.215245,-0.427152,0.477583,-0.910658,-0.339916,2.1333,1.011428
C,0.197102,1.00871,0.3138,-0.399775,-0.104089,0.231224,0.556429
D,0.833892,-0.066478,-0.305103,-0.290959,1.161046,-0.384533,-0.696156


In [13]:
df.sort_index()

Unnamed: 0,A,B,C,D
2012-08-30,0.244408,-0.215245,0.197102,0.833892
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478
2012-09-01,-0.919489,0.477583,0.3138,-0.305103
2012-09-02,-0.496799,-0.910658,-0.399775,-0.290959
2012-09-03,-1.831434,-0.339916,-0.104089,1.161046
2012-09-04,0.420831,2.1333,0.231224,-0.384533
2012-09-05,0.01325,1.011428,0.556429,-0.696156


In [18]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2012-09-02,-0.496799,-0.910658,-0.399775,-0.290959
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478
2012-09-03,-1.831434,-0.339916,-0.104089,1.161046
2012-08-30,0.244408,-0.215245,0.197102,0.833892
2012-09-01,-0.919489,0.477583,0.3138,-0.305103
2012-09-05,0.01325,1.011428,0.556429,-0.696156
2012-09-04,0.420831,2.1333,0.231224,-0.384533


## Selection

### Getting

In [20]:
df['A']

2012-08-30    0.244408
2012-08-31   -2.271624
2012-09-01   -0.919489
2012-09-02   -0.496799
2012-09-03   -1.831434
2012-09-04    0.420831
2012-09-05    0.013250
Freq: D, Name: A, dtype: float64

In [21]:
df['20120830':'20120902']

Unnamed: 0,A,B,C,D
2012-08-30,0.244408,-0.215245,0.197102,0.833892
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478
2012-09-01,-0.919489,0.477583,0.3138,-0.305103
2012-09-02,-0.496799,-0.910658,-0.399775,-0.290959


### Selection by label

In [22]:
df.loc[dates[0]]

A    0.244408
B   -0.215245
C    0.197102
D    0.833892
Name: 2012-08-30 00:00:00, dtype: float64

In [23]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2012-08-30,0.244408,-0.215245
2012-08-31,-2.271624,-0.427152
2012-09-01,-0.919489,0.477583
2012-09-02,-0.496799,-0.910658
2012-09-03,-1.831434,-0.339916
2012-09-04,0.420831,2.1333
2012-09-05,0.01325,1.011428


In [24]:
df.loc["20120830":"20120903", ["A", "B"]]

Unnamed: 0,A,B
2012-08-30,0.244408,-0.215245
2012-08-31,-2.271624,-0.427152
2012-09-01,-0.919489,0.477583
2012-09-02,-0.496799,-0.910658
2012-09-03,-1.831434,-0.339916


In [25]:
df.loc[dates[0], "A"]

0.24440754054021702

In [26]:
df.at[dates[0], "A"]

0.24440754054021702

In [27]:
df.iloc[3]

A   -0.496799
B   -0.910658
C   -0.399775
D   -0.290959
Name: 2012-09-02 00:00:00, dtype: float64

In [28]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2012-09-02,-0.496799,-0.910658
2012-09-03,-1.831434,-0.339916


In [29]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2012-08-31,-2.271624,1.00871
2012-09-01,-0.919489,0.3138
2012-09-03,-1.831434,-0.104089


In [30]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478
2012-09-01,-0.919489,0.477583,0.3138,-0.305103


In [31]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2012-08-30,-0.215245,0.197102
2012-08-31,-0.427152,1.00871
2012-09-01,0.477583,0.3138
2012-09-02,-0.910658,-0.399775
2012-09-03,-0.339916,-0.104089
2012-09-04,2.1333,0.231224
2012-09-05,1.011428,0.556429


In [32]:
df.iloc[1, 1]

-0.4271516823993775

In [33]:
df.iat[1, 1]

-0.4271516823993775

### Boolean indexing

In [34]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2012-08-30,0.244408,-0.215245,0.197102,0.833892
2012-09-04,0.420831,2.1333,0.231224,-0.384533
2012-09-05,0.01325,1.011428,0.556429,-0.696156


In [35]:
df[df > 0]

Unnamed: 0,A,B,C,D
2012-08-30,0.244408,,0.197102,0.833892
2012-08-31,,,1.00871,
2012-09-01,,0.477583,0.3138,
2012-09-02,,,,
2012-09-03,,,,1.161046
2012-09-04,0.420831,2.1333,0.231224,
2012-09-05,0.01325,1.011428,0.556429,


In [37]:
df2 = df.copy()
df2["E"] = ["one", "one", "two", "three", "four", "three", "five"]
df2

Unnamed: 0,A,B,C,D,E
2012-08-30,0.244408,-0.215245,0.197102,0.833892,one
2012-08-31,-2.271624,-0.427152,1.00871,-0.066478,one
2012-09-01,-0.919489,0.477583,0.3138,-0.305103,two
2012-09-02,-0.496799,-0.910658,-0.399775,-0.290959,three
2012-09-03,-1.831434,-0.339916,-0.104089,1.161046,four
2012-09-04,0.420831,2.1333,0.231224,-0.384533,three
2012-09-05,0.01325,1.011428,0.556429,-0.696156,five
