# Object Creation

In [1]:
import numpy as np
import pandas as pd

In [2]:
#Creating a Series by passing a list of values, 
#letting pandas create a default integer index:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns:

In [3]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.341992,2.112777,0.352103,-0.119959
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188
2013-01-04,0.144842,-0.274216,0.309229,-1.739088
2013-01-05,1.368244,0.911509,-0.212801,0.109769
2013-01-06,0.21122,0.356671,-0.061084,1.070357


Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [6]:
df2 = pd.DataFrame({'A': 1.,
   ...:             'B': pd.Timestamp('20130102'),
   ...:             'C': pd.Series(1, index=list(range(4)), dtype='float32'),
   ...:             'D': np.array([3] * 4, dtype='int32'),
   ...:             'E': pd.Categorical(["test", "train", "test", "train"]),
   ...:             'F': 'foo'})
   ...: 
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# Viewing Data

In [8]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.341992,2.112777,0.352103,-0.119959
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188
2013-01-04,0.144842,-0.274216,0.309229,-1.739088
2013-01-05,1.368244,0.911509,-0.212801,0.109769


In [9]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.144842,-0.274216,0.309229,-1.739088
2013-01-05,1.368244,0.911509,-0.212801,0.109769
2013-01-06,0.21122,0.356671,-0.061084,1.070357


In [10]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

DataFrame.to_numpy() gives a NumPy representation of the underlying data. Note that his can be an expensive operation when your DataFrame has columns with different data types, which comes down to a fundamental difference between pandas and NumPy: NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column. When you call DataFrame.to_numpy(), pandas will find the NumPy dtype that can hold all of the dtypes in the DataFrame. This may end up being object, which requires casting every value to a Python object.

For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data.

In [12]:
df.to_numpy()

array([[ 0.34199178,  2.11277737,  0.35210332, -0.11995924],
       [-0.24347228, -0.58777556,  0.13805225, -0.5530286 ],
       [-0.42479686, -1.1618063 , -0.74162743, -1.03418835],
       [ 0.14484173, -0.27421631,  0.30922856, -1.73908786],
       [ 1.36824388,  0.91150877, -0.21280142,  0.1097691 ],
       [ 0.21121982,  0.35667075, -0.0610841 ,  1.070357  ]])

For df2, the DataFrame with multiple dtypes, DataFrame.to_numpy() is relatively expensive.

In [13]:
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [14]:
#describe() shows a quick statistic summary of your data:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.233005,0.226193,-0.036021,-0.37769
std,0.627475,1.173489,0.407406,0.971187
min,-0.424797,-1.161806,-0.741627,-1.739088
25%,-0.146394,-0.509386,-0.174872,-0.913898
50%,0.178031,0.041227,0.038484,-0.336494
75%,0.309299,0.772799,0.266434,0.052337
max,1.368244,2.112777,0.352103,1.070357


In [15]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.341992,-0.243472,-0.424797,0.144842,1.368244,0.21122
B,2.112777,-0.587776,-1.161806,-0.274216,0.911509,0.356671
C,0.352103,0.138052,-0.741627,0.309229,-0.212801,-0.061084
D,-0.119959,-0.553029,-1.034188,-1.739088,0.109769,1.070357


In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.119959,0.352103,2.112777,0.341992
2013-01-02,-0.553029,0.138052,-0.587776,-0.243472
2013-01-03,-1.034188,-0.741627,-1.161806,-0.424797
2013-01-04,-1.739088,0.309229,-0.274216,0.144842
2013-01-05,0.109769,-0.212801,0.911509,1.368244
2013-01-06,1.070357,-0.061084,0.356671,0.21122


In [17]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-04,0.144842,-0.274216,0.309229,-1.739088
2013-01-06,0.21122,0.356671,-0.061084,1.070357
2013-01-05,1.368244,0.911509,-0.212801,0.109769
2013-01-01,0.341992,2.112777,0.352103,-0.119959


In [18]:
#Getting
df['A']

2013-01-01    0.341992
2013-01-02   -0.243472
2013-01-03   -0.424797
2013-01-04    0.144842
2013-01-05    1.368244
2013-01-06    0.211220
Freq: D, Name: A, dtype: float64

In [19]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.341992,2.112777,0.352103,-0.119959
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188


In [20]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188
2013-01-04,0.144842,-0.274216,0.309229,-1.739088


In [21]:
df.loc[dates[0]]

A    0.341992
B    2.112777
C    0.352103
D   -0.119959
Name: 2013-01-01 00:00:00, dtype: float64

In [22]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,0.341992,2.112777
2013-01-02,-0.243472,-0.587776
2013-01-03,-0.424797,-1.161806
2013-01-04,0.144842,-0.274216
2013-01-05,1.368244,0.911509
2013-01-06,0.21122,0.356671


In [23]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,-0.243472,-0.587776
2013-01-03,-0.424797,-1.161806
2013-01-04,0.144842,-0.274216


In [24]:
df.loc['20130102', ['A', 'B']]

A   -0.243472
B   -0.587776
Name: 2013-01-02 00:00:00, dtype: float64

In [25]:
df.loc[dates[0], 'A']

0.34199178183408513

In [26]:
df.iloc[3]

A    0.144842
B   -0.274216
C    0.309229
D   -1.739088
Name: 2013-01-04 00:00:00, dtype: float64

In [27]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.144842,-0.274216
2013-01-05,1.368244,0.911509


In [28]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,-0.243472,0.138052
2013-01-03,-0.424797,-0.741627
2013-01-05,1.368244,-0.212801


In [29]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188


In [30]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,2.112777,0.352103
2013-01-02,-0.587776,0.138052
2013-01-03,-1.161806,-0.741627
2013-01-04,-0.274216,0.309229
2013-01-05,0.911509,-0.212801
2013-01-06,0.356671,-0.061084


In [31]:
df.iloc[1, 1]

-0.5877755573941384

In [32]:
df.iat[1, 1]

-0.5877755573941384

In [33]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.341992,2.112777,0.352103,-0.119959
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188
2013-01-04,0.144842,-0.274216,0.309229,-1.739088
2013-01-05,1.368244,0.911509,-0.212801,0.109769
2013-01-06,0.21122,0.356671,-0.061084,1.070357


In [34]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.341992,2.112777,0.352103,-0.119959
2013-01-04,0.144842,-0.274216,0.309229,-1.739088
2013-01-05,1.368244,0.911509,-0.212801,0.109769
2013-01-06,0.21122,0.356671,-0.061084,1.070357


In [35]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.341992,2.112777,0.352103,
2013-01-02,,,0.138052,
2013-01-03,,,,
2013-01-04,0.144842,,0.309229,
2013-01-05,1.368244,0.911509,,0.109769
2013-01-06,0.21122,0.356671,,1.070357


In [36]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.341992,2.112777,0.352103,-0.119959,one
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029,one
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188,two
2013-01-04,0.144842,-0.274216,0.309229,-1.739088,three
2013-01-05,1.368244,0.911509,-0.212801,0.109769,four
2013-01-06,0.21122,0.356671,-0.061084,1.070357,three


In [37]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188,two
2013-01-05,1.368244,0.911509,-0.212801,0.109769,four


In [39]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [41]:
df['F'] = s1
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.341992,2.112777,0.352103,-0.119959,
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188,2.0
2013-01-04,0.144842,-0.274216,0.309229,-1.739088,3.0
2013-01-05,1.368244,0.911509,-0.212801,0.109769,4.0
2013-01-06,0.21122,0.356671,-0.061084,1.070357,5.0


In [43]:
df.at[dates[0], 'A'] = 0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,2.112777,0.352103,-0.119959,
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188,2.0
2013-01-04,0.144842,-0.274216,0.309229,-1.739088,3.0
2013-01-05,1.368244,0.911509,-0.212801,0.109769,4.0
2013-01-06,0.21122,0.356671,-0.061084,1.070357,5.0


In [44]:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.352103,-0.119959,
2013-01-02,-0.243472,-0.587776,0.138052,-0.553029,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,-1.034188,2.0
2013-01-04,0.144842,-0.274216,0.309229,-1.739088,3.0
2013-01-05,1.368244,0.911509,-0.212801,0.109769,4.0
2013-01-06,0.21122,0.356671,-0.061084,1.070357,5.0


In [45]:
df.loc[:, 'D'] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.352103,5,
2013-01-02,-0.243472,-0.587776,0.138052,5,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,5,2.0
2013-01-04,0.144842,-0.274216,0.309229,5,3.0
2013-01-05,1.368244,0.911509,-0.212801,5,4.0
2013-01-06,0.21122,0.356671,-0.061084,5,5.0


In [46]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.352103,-5,
2013-01-02,-0.243472,-0.587776,-0.138052,-5,-1.0
2013-01-03,-0.424797,-1.161806,-0.741627,-5,-2.0
2013-01-04,-0.144842,-0.274216,-0.309229,-5,-3.0
2013-01-05,-1.368244,-0.911509,-0.212801,-5,-4.0
2013-01-06,-0.21122,-0.356671,-0.061084,-5,-5.0


In [47]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.352103,5,,1.0
2013-01-02,-0.243472,-0.587776,0.138052,5,1.0,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,5,2.0,
2013-01-04,0.144842,-0.274216,0.309229,5,3.0,


In [48]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,F,E
2013-01-02,-0.243472,-0.587776,0.138052,5,1.0,1.0


In [49]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.352103,5,5.0,1.0
2013-01-02,-0.243472,-0.587776,0.138052,5,1.0,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,5,2.0,5.0
2013-01-04,0.144842,-0.274216,0.309229,5,3.0,5.0


In [50]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.352103,5,5.0,1.0
2013-01-02,-0.243472,-0.587776,0.138052,5,1.0,1.0
2013-01-03,-0.424797,-1.161806,-0.741627,5,2.0,5.0
2013-01-04,0.144842,-0.274216,0.309229,5,3.0,5.0


In [51]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,False,False,False,False,True,False
2013-01-02,False,False,False,False,False,False
2013-01-03,False,False,False,False,False,True
2013-01-04,False,False,False,False,False,True


In [52]:
df.mean(1)

2013-01-01    1.338026
2013-01-02    1.061361
2013-01-03    0.934354
2013-01-04    1.635971
2013-01-05    2.213390
2013-01-06    2.101361
Freq: D, dtype: float64

In [54]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [55]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D,F
2013-01-01,,,,,
2013-01-02,,,,,
2013-01-03,-1.424797,-2.161806,-1.741627,4.0,1.0
2013-01-04,-2.855158,-3.274216,-2.690771,2.0,0.0
2013-01-05,-3.631756,-4.088491,-5.212801,0.0,-1.0
2013-01-06,,,,,


In [56]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.352103,5,
2013-01-02,-0.243472,-0.587776,0.490156,10,1.0
2013-01-03,-0.668269,-1.749582,-0.251472,15,3.0
2013-01-04,-0.523427,-2.023798,0.057757,20,6.0
2013-01-05,0.844816,-1.112289,-0.155045,25,10.0
2013-01-06,1.056036,-0.755619,-0.216129,30,15.0


In [57]:
df.apply(lambda x: x.max() - x.min())

A    1.793041
B    2.073315
C    1.093731
D    0.000000
F    4.000000
dtype: float64

In [58]:
s = pd.Series(np.random.randint(0, 7, size=10))

In [59]:
s

0    0
1    6
2    4
3    5
4    6
5    3
6    3
7    5
8    2
9    3
dtype: int32

In [60]:
s.value_counts()

3    3
6    2
5    2
4    1
2    1
0    1
dtype: int64

In [61]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [62]:
df = pd.DataFrame(np.random.randn(10, 4))
df

Unnamed: 0,0,1,2,3
0,0.24361,0.380866,1.708363,-0.805376
1,-1.264247,0.543656,0.917021,-1.105667
2,-0.880562,-0.614802,-0.142941,-0.131354
3,-0.438816,-0.352065,0.453188,-0.002181
4,-1.021318,0.4184,-0.05295,0.806071
5,0.452448,-0.192144,0.2429,-0.567269
6,1.681793,1.745614,-0.356137,-1.833034
7,1.138758,-0.348099,0.206345,-0.321107
8,0.5456,-0.916702,-0.062658,1.472019
9,-0.371293,-0.055561,-0.49737,-0.796101


In [63]:
pieces = [df[:3], df[3:7], df[7:]]
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.24361,0.380866,1.708363,-0.805376
1,-1.264247,0.543656,0.917021,-1.105667
2,-0.880562,-0.614802,-0.142941,-0.131354
3,-0.438816,-0.352065,0.453188,-0.002181
4,-1.021318,0.4184,-0.05295,0.806071
5,0.452448,-0.192144,0.2429,-0.567269
6,1.681793,1.745614,-0.356137,-1.833034
7,1.138758,-0.348099,0.206345,-0.321107
8,0.5456,-0.916702,-0.062658,1.472019
9,-0.371293,-0.055561,-0.49737,-0.796101


In [64]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [65]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [66]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [67]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [68]:
right

Unnamed: 0,key,rval
0,foo,4
1,bar,5


In [69]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


In [70]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,0.074221,-0.139566,-1.0513,0.463759
1,-0.009641,0.058647,-1.596504,-0.568191
2,1.348538,-0.264946,1.989395,0.425886
3,1.108864,-0.924722,1.168957,-0.911804
4,1.180852,1.694254,-0.626953,0.086285
5,1.216657,0.252812,-0.634359,2.003626
6,-0.395313,1.039331,0.860539,0.399722
7,0.090327,-1.258723,-0.645656,-0.150624


In [71]:
s = df.iloc[3]
df.append(s, ignore_index=True)

Unnamed: 0,A,B,C,D
0,0.074221,-0.139566,-1.0513,0.463759
1,-0.009641,0.058647,-1.596504,-0.568191
2,1.348538,-0.264946,1.989395,0.425886
3,1.108864,-0.924722,1.168957,-0.911804
4,1.180852,1.694254,-0.626953,0.086285
5,1.216657,0.252812,-0.634359,2.003626
6,-0.395313,1.039331,0.860539,0.399722
7,0.090327,-1.258723,-0.645656,-0.150624
8,1.108864,-0.924722,1.168957,-0.911804


In [72]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
   ....:                          'foo', 'bar', 'foo', 'foo'],
   ....:                    'B': ['one', 'one', 'two', 'three',
   ....:                          'two', 'two', 'one', 'three'],
   ....:                    'C': np.random.randn(8),
   ....:                    'D': np.random.randn(8)})
   ....: 
df

Unnamed: 0,A,B,C,D
0,foo,one,1.568441,0.155134
1,bar,one,-0.374113,-1.967893
2,foo,two,0.294632,-1.000402
3,bar,three,-0.688598,-0.381287
4,foo,two,-0.001362,0.732068
5,bar,two,2.075172,0.969829
6,foo,one,-0.908007,0.341212
7,foo,three,-1.169838,0.532019


In [73]:
df.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,1.012461,-1.379352
foo,-0.216134,0.76003


In [74]:
df.groupby(['A', 'B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.374113,-1.967893
bar,three,-0.688598,-0.381287
bar,two,2.075172,0.969829
foo,one,0.660434,0.496345
foo,three,-1.169838,0.532019
foo,two,0.29327,-0.268335


In [75]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
   ....:                      'foo', 'foo', 'qux', 'qux'],
   ....:                     ['one', 'two', 'one', 'two',
   ....:                      'one', 'two', 'one', 'two']]))
   ....: 
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.591726,0.470522
bar,two,-0.135588,-0.283423
baz,one,0.190774,1.547646
baz,two,-1.877306,-0.78526


In [76]:
stacked = df2.stack()
stacked

first  second   
bar    one     A   -0.591726
               B    0.470522
       two     A   -0.135588
               B   -0.283423
baz    one     A    0.190774
               B    1.547646
       two     A   -1.877306
               B   -0.785260
dtype: float64

In [77]:
stacked.unstack()
stacked.unstack(1)
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.591726,0.190774
one,B,0.470522,1.547646
two,A,-0.135588,-1.877306
two,B,-0.283423,-0.78526


In [78]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
   .....:                    'B': ['A', 'B', 'C'] * 4,
   .....:                    'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
   .....:                    'D': np.random.randn(12),
   .....:                    'E': np.random.randn(12)})
   .....: 
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,0.072692,0.421226
1,one,B,foo,0.181554,-0.196407
2,two,C,foo,-0.326353,-0.088025
3,three,A,bar,-0.305409,-1.016411
4,one,B,bar,0.946921,-0.992936
5,one,C,bar,0.890484,0.787564
6,two,A,foo,1.022299,-0.439568
7,three,B,foo,1.40384,1.861095
8,one,C,foo,-1.209024,-0.036655
9,one,A,bar,-0.63944,0.354326


In [79]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.63944,0.072692
one,B,0.946921,0.181554
one,C,0.890484,-1.209024
three,A,-0.305409,
three,B,,1.40384
three,C,-0.527037,
two,A,,1.022299
two,B,-1.330951,
two,C,,-0.326353


In [80]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.resample('5Min').sum()

2012-01-01    26738
Freq: 5T, dtype: int32

In [81]:
rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2012-03-06    0.960049
2012-03-07   -1.872956
2012-03-08   -0.075522
2012-03-09    1.357822
2012-03-10   -0.015725
Freq: D, dtype: float64

In [82]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-06 00:00:00+00:00    0.960049
2012-03-07 00:00:00+00:00   -1.872956
2012-03-08 00:00:00+00:00   -0.075522
2012-03-09 00:00:00+00:00    1.357822
2012-03-10 00:00:00+00:00   -0.015725
Freq: D, dtype: float64

In [83]:
ts_utc.tz_convert('US/Eastern')

2012-03-05 19:00:00-05:00    0.960049
2012-03-06 19:00:00-05:00   -1.872956
2012-03-07 19:00:00-05:00   -0.075522
2012-03-08 19:00:00-05:00    1.357822
2012-03-09 19:00:00-05:00   -0.015725
Freq: D, dtype: float64

In [84]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-01-31   -1.127243
2012-02-29   -0.208890
2012-03-31   -0.612622
2012-04-30   -0.187467
2012-05-31    0.059564
Freq: M, dtype: float64

In [85]:
ps = ts.to_period()
ps

2012-01   -1.127243
2012-02   -0.208890
2012-03   -0.612622
2012-04   -0.187467
2012-05    0.059564
Freq: M, dtype: float64

In [86]:
ps.to_timestamp()

2012-01-01   -1.127243
2012-02-01   -0.208890
2012-03-01   -0.612622
2012-04-01   -0.187467
2012-05-01    0.059564
Freq: MS, dtype: float64

In [87]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts.head()

1990-03-01 09:00   -0.701594
1990-06-01 09:00    0.374321
1990-09-01 09:00   -0.069774
1990-12-01 09:00   -0.172427
1991-03-01 09:00    0.407817
Freq: H, dtype: float64