# 10 minutes to pandas

In [1]:
# From 
# https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html

In [3]:
import numpy as np
import pandas as pd

# Object creation

In [5]:
s = pd.Series([1,3,5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), 
                  index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.318496,0.219013,0.464563,0.248729
2013-01-02,-1.054389,1.015831,-0.120781,1.166959
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062
2013-01-04,0.154214,0.067054,1.838379,-0.557384
2013-01-05,1.182351,-1.589623,1.106488,0.287534
2013-01-06,-1.181015,-1.137324,-0.416741,-1.083855


In [8]:
df2 = pd.DataFrame({'A': 1,
                   'B': pd.Timestamp('20130102'),
                   'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                   'D': np.array([3] * 4, dtype='int32'),
                   'E': pd.Categorical(["test","train","test","train"]),
                   'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1.0,3,test,foo
1,1,2013-01-02,1.0,3,train,foo
2,1,2013-01-02,1.0,3,test,foo
3,1,2013-01-02,1.0,3,train,foo


In [12]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# Viewing data

In [16]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.318496,0.219013,0.464563,0.248729
2013-01-02,-1.054389,1.015831,-0.120781,1.166959
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062
2013-01-04,0.154214,0.067054,1.838379,-0.557384
2013-01-05,1.182351,-1.589623,1.106488,0.287534


In [18]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.154214,0.067054,1.838379,-0.557384
2013-01-05,1.182351,-1.589623,1.106488,0.287534
2013-01-06,-1.181015,-1.137324,-0.416741,-1.083855


In [19]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [20]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [21]:
df.to_numpy()

array([[ 0.31849599,  0.2190127 ,  0.4645628 ,  0.24872859],
       [-1.05438927,  1.01583117, -0.12078125,  1.16695937],
       [-0.46668111,  0.51913486, -2.77661673, -2.04706249],
       [ 0.15421378,  0.06705376,  1.8383795 , -0.5573844 ],
       [ 1.18235098, -1.58962286,  1.10648788,  0.28753355],
       [-1.18101509, -1.13732375, -0.41674114, -1.08385461]])

In [22]:
df2.to_numpy()

array([[1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [23]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.174504,-0.150986,0.015882,-0.330847
std,0.901642,1.003743,1.594749,1.142406
min,-1.181015,-1.589623,-2.776617,-2.047062
25%,-0.907462,-0.836229,-0.342751,-0.952237
50%,-0.156234,0.143033,0.171891,-0.154328
75%,0.277425,0.444104,0.946007,0.277832
max,1.182351,1.015831,1.838379,1.166959


In [24]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.318496,-1.054389,-0.466681,0.154214,1.182351,-1.181015
B,0.219013,1.015831,0.519135,0.067054,-1.589623,-1.137324
C,0.464563,-0.120781,-2.776617,1.838379,1.106488,-0.416741
D,0.248729,1.166959,-2.047062,-0.557384,0.287534,-1.083855


In [25]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.248729,0.464563,0.219013,0.318496
2013-01-02,1.166959,-0.120781,1.015831,-1.054389
2013-01-03,-2.047062,-2.776617,0.519135,-0.466681
2013-01-04,-0.557384,1.838379,0.067054,0.154214
2013-01-05,0.287534,1.106488,-1.589623,1.182351
2013-01-06,-1.083855,-0.416741,-1.137324,-1.181015


In [28]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-05,1.182351,-1.589623,1.106488,0.287534
2013-01-06,-1.181015,-1.137324,-0.416741,-1.083855
2013-01-04,0.154214,0.067054,1.838379,-0.557384
2013-01-01,0.318496,0.219013,0.464563,0.248729
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062
2013-01-02,-1.054389,1.015831,-0.120781,1.166959


# Selection

In [29]:
df['A']

2013-01-01    0.318496
2013-01-02   -1.054389
2013-01-03   -0.466681
2013-01-04    0.154214
2013-01-05    1.182351
2013-01-06   -1.181015
Freq: D, Name: A, dtype: float64

In [30]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.318496,0.219013,0.464563,0.248729
2013-01-02,-1.054389,1.015831,-0.120781,1.166959
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062


In [31]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-1.054389,1.015831,-0.120781,1.166959
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062
2013-01-04,0.154214,0.067054,1.838379,-0.557384


In [32]:
df.loc[dates[0]]

A    0.318496
B    0.219013
C    0.464563
D    0.248729
Name: 2013-01-01 00:00:00, dtype: float64

In [33]:
df.iloc[0]

A    0.318496
B    0.219013
C    0.464563
D    0.248729
Name: 2013-01-01 00:00:00, dtype: float64

In [34]:
df.loc[:, ['A','B']]

Unnamed: 0,A,B
2013-01-01,0.318496,0.219013
2013-01-02,-1.054389,1.015831
2013-01-03,-0.466681,0.519135
2013-01-04,0.154214,0.067054
2013-01-05,1.182351,-1.589623
2013-01-06,-1.181015,-1.137324


In [35]:
df.loc['20130102':'20130104', ['A','B']]

Unnamed: 0,A,B
2013-01-02,-1.054389,1.015831
2013-01-03,-0.466681,0.519135
2013-01-04,0.154214,0.067054


In [37]:
df.loc['20130102', ['A','B']]

A   -1.054389
B    1.015831
Name: 2013-01-02 00:00:00, dtype: float64

In [38]:
df.loc[dates[0], 'A']

0.3184959934793238

In [39]:
df.at[dates[0], 'A']

0.3184959934793238

# Selection by position

In [40]:
df.iloc[3]

A    0.154214
B    0.067054
C    1.838379
D   -0.557384
Name: 2013-01-04 00:00:00, dtype: float64

In [41]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.154214,0.067054
2013-01-05,1.182351,-1.589623


In [43]:
df.iloc[[1,2,4], [0,2]]

Unnamed: 0,A,C
2013-01-02,-1.054389,-0.120781
2013-01-03,-0.466681,-2.776617
2013-01-05,1.182351,1.106488


In [44]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,-1.054389,1.015831,-0.120781,1.166959
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062


In [45]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,0.219013,0.464563
2013-01-02,1.015831,-0.120781
2013-01-03,0.519135,-2.776617
2013-01-04,0.067054,1.838379
2013-01-05,-1.589623,1.106488
2013-01-06,-1.137324,-0.416741


In [46]:
df.iloc[1, 1]

1.0158311704849952

In [47]:
df.iat[1, 1]

1.0158311704849952

# Boolean indexing

In [48]:
df[df['A'] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.318496,0.219013,0.464563,0.248729
2013-01-04,0.154214,0.067054,1.838379,-0.557384
2013-01-05,1.182351,-1.589623,1.106488,0.287534


In [49]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.318496,0.219013,0.464563,0.248729
2013-01-02,,1.015831,,1.166959
2013-01-03,,0.519135,,
2013-01-04,0.154214,0.067054,1.838379,
2013-01-05,1.182351,,1.106488,0.287534
2013-01-06,,,,


In [51]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.318496,0.219013,0.464563,0.248729,one
2013-01-02,-1.054389,1.015831,-0.120781,1.166959,one
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062,two
2013-01-04,0.154214,0.067054,1.838379,-0.557384,three
2013-01-05,1.182351,-1.589623,1.106488,0.287534,four
2013-01-06,-1.181015,-1.137324,-0.416741,-1.083855,three


In [52]:
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-0.466681,0.519135,-2.776617,-2.047062,two
2013-01-05,1.182351,-1.589623,1.106488,0.287534,four


# Setting

In [53]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))
s1

2013-01-01    1
2013-01-02    2
2013-01-03    3
2013-01-04    4
2013-01-05    5
2013-01-06    6
Freq: D, dtype: int64

In [54]:
df['F'] = s1

In [55]:
df.at[dates[0], 'A'] = 0

In [56]:
df.iat[0,1] = 0

In [57]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [58]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.464563,5,1
2013-01-02,-1.054389,1.015831,-0.120781,5,2
2013-01-03,-0.466681,0.519135,-2.776617,5,3
2013-01-04,0.154214,0.067054,1.838379,5,4
2013-01-05,1.182351,-1.589623,1.106488,5,5
2013-01-06,-1.181015,-1.137324,-0.416741,5,6


In [59]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.464563,-5,-1
2013-01-02,-1.054389,-1.015831,-0.120781,-5,-2
2013-01-03,-0.466681,-0.519135,-2.776617,-5,-3
2013-01-04,-0.154214,-0.067054,-1.838379,-5,-4
2013-01-05,-1.182351,-1.589623,-1.106488,-5,-5
2013-01-06,-1.181015,-1.137324,-0.416741,-5,-6


# Missing data

In [60]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.464563,5,1,1.0
2013-01-02,-1.054389,1.015831,-0.120781,5,2,1.0
2013-01-03,-0.466681,0.519135,-2.776617,5,3,
2013-01-04,0.154214,0.067054,1.838379,5,4,


In [61]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.464563,5,1,1.0
2013-01-02,-1.054389,1.015831,-0.120781,5,2,1.0


In [63]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.464563,5,1,1.0
2013-01-02,-1.054389,1.015831,-0.120781,5,2,1.0
2013-01-03,-0.466681,0.519135,-2.776617,5,3,5.0
2013-01-04,0.154214,0.067054,1.838379,5,4,5.0


In [64]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,False,False,False,False,False,False
2013-01-02,False,False,False,False,False,False
2013-01-03,False,False,False,False,False,True
2013-01-04,False,False,False,False,False,True


# Operations

# Stats

In [65]:
df.mean()

A   -0.227587
B   -0.187488
C    0.015882
D    5.000000
F    3.500000
dtype: float64

In [66]:
df.mean(1)

2013-01-01    1.292913
2013-01-02    1.368132
2013-01-03    1.055167
2013-01-04    2.211929
2013-01-05    2.139843
2013-01-06    1.652984
Freq: D, dtype: float64

In [67]:
s = pd.Series([1,3,5, np.nan, 6,8], index=dates).shift(2)
s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [68]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D,F
2013-01-01,,,,,
2013-01-02,,,,,
2013-01-03,-1.466681,-0.480865,-3.776617,4.0,2.0
2013-01-04,-2.845786,-2.932946,-1.161621,2.0,1.0
2013-01-05,-3.817649,-6.589623,-3.893512,0.0,0.0
2013-01-06,,,,,


# Apply

In [69]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.464563,5,1
2013-01-02,-1.054389,1.015831,0.343782,10,3
2013-01-03,-1.52107,1.534966,-2.432835,15,6
2013-01-04,-1.366857,1.60202,-0.594456,20,10
2013-01-05,-0.184506,0.012397,0.512032,25,15
2013-01-06,-1.365521,-1.124927,0.095291,30,21


In [71]:
df.apply(lambda x: x.max() - x.min())

A    2.363366
B    2.605454
C    4.614996
D    0.000000
F    5.000000
dtype: float64

# Histogramming

In [72]:
s = pd.Series(np.random.randint(0,7, size=10))
s

0    6
1    2
2    4
3    0
4    3
5    2
6    1
7    0
8    4
9    2
dtype: int64

In [73]:
s.value_counts()

2    3
4    2
0    2
6    1
3    1
1    1
dtype: int64

In [76]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

# Merge

In [77]:
df = pd.DataFrame(np.random.randn(10,4))
df

Unnamed: 0,0,1,2,3
0,0.702279,-0.262863,0.140262,-0.282625
1,1.17483,-1.096141,-0.486913,0.042369
2,0.290252,-0.463549,-0.592393,0.872333
3,-0.161463,1.011576,0.629472,0.416561
4,-0.56555,-0.663933,-0.718788,-0.213509
5,-1.44711,-0.38685,-1.864836,1.340448
6,0.626913,0.034563,0.153187,0.860431
7,2.014923,-0.210833,-0.308207,0.381133
8,0.539964,-0.342371,3.049113,0.85659
9,-0.783468,-0.165158,0.106762,-1.468627


In [78]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

[          0         1         2         3
 0  0.702279 -0.262863  0.140262 -0.282625
 1  1.174830 -1.096141 -0.486913  0.042369
 2  0.290252 -0.463549 -0.592393  0.872333,
           0         1         2         3
 3 -0.161463  1.011576  0.629472  0.416561
 4 -0.565550 -0.663933 -0.718788 -0.213509
 5 -1.447110 -0.386850 -1.864836  1.340448
 6  0.626913  0.034563  0.153187  0.860431,
           0         1         2         3
 7  2.014923 -0.210833 -0.308207  0.381133
 8  0.539964 -0.342371  3.049113  0.856590
 9 -0.783468 -0.165158  0.106762 -1.468627]

In [79]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.702279,-0.262863,0.140262,-0.282625
1,1.17483,-1.096141,-0.486913,0.042369
2,0.290252,-0.463549,-0.592393,0.872333
3,-0.161463,1.011576,0.629472,0.416561
4,-0.56555,-0.663933,-0.718788,-0.213509
5,-1.44711,-0.38685,-1.864836,1.340448
6,0.626913,0.034563,0.153187,0.860431
7,2.014923,-0.210833,-0.308207,0.381133
8,0.539964,-0.342371,3.049113,0.85659
9,-0.783468,-0.165158,0.106762,-1.468627


In [80]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

In [81]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [82]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [83]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [84]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})


In [85]:
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [86]:
right

Unnamed: 0,key,rval
0,foo,4
1,bar,5


In [87]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


# Grouping

In [89]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                    'foo', 'bar', 'foo', 'foo'],
              'B': ['one', 'one', 'two', 'three',
                    'two', 'two', 'one', 'three'],
              'C': np.random.randn(8), 
              'D': np.random.randn(8)}) 
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.677499,-0.217255
1,bar,one,-0.315398,-0.210256
2,foo,two,-0.927772,-0.145348
3,bar,three,-0.650188,-2.310731
4,foo,two,-1.039139,0.564095
5,bar,two,-0.96054,-1.218539
6,foo,one,0.821982,-1.365133
7,foo,three,-1.30214,-2.156352


In [90]:
df.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-1.926126,-3.739526
foo,-3.124567,-3.319993


In [91]:
df.groupby(['A','B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.315398,-0.210256
bar,three,-0.650188,-2.310731
bar,two,-0.96054,-1.218539
foo,one,0.144484,-1.582388
foo,three,-1.30214,-2.156352
foo,two,-1.966911,0.418747


# Reshaping

In [92]:
tuples = list(zip(*[['bar','bar','baz','baz',
                    'foo', 'foo', 'qux', 'qux'],
                   ['one', 'two', 'one', 'two',
                   'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, names=['first','second'])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.447347,-0.760116
bar,two,-2.249788,-1.504774
baz,one,0.523538,-1.301731
baz,two,0.150211,0.596186


In [93]:
stacked = df2.stack()
stacked

first  second   
bar    one     A   -0.447347
               B   -0.760116
       two     A   -2.249788
               B   -1.504774
baz    one     A    0.523538
               B   -1.301731
       two     A    0.150211
               B    0.596186
dtype: float64

In [94]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.447347,-0.760116
bar,two,-2.249788,-1.504774
baz,one,0.523538,-1.301731
baz,two,0.150211,0.596186


In [95]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.447347,-2.249788
bar,B,-0.760116,-1.504774
baz,A,0.523538,0.150211
baz,B,-1.301731,0.596186


In [96]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.447347,0.523538
one,B,-0.760116,-1.301731
two,A,-2.249788,0.150211
two,B,-1.504774,0.596186


In [97]:
# Pivot Tables

In [98]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
                  'B': ['A', 'B', 'C'] * 4,
                  'C': ['foo','foo', 'foo','bar', 'bar','bar']*2,
                  'D': np.random.randn(12),
                  'E': np.random.randn(12)})
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.001008,1.956445
1,one,B,foo,-0.031484,-0.948605
2,two,C,foo,-0.259619,0.678454
3,three,A,bar,0.531552,0.128503
4,one,B,bar,0.130644,-0.705336
5,one,C,bar,-0.45053,0.820044
6,two,A,foo,-0.533591,1.040841
7,three,B,foo,0.384707,0.917572
8,one,C,foo,-0.89219,0.416339
9,one,A,bar,-0.79846,1.732173


In [101]:
pd.pivot_table(df, values='D', index=['A','B'], columns=['C'])


Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.79846,-0.001008
one,B,0.130644,-0.031484
one,C,-0.45053,-0.89219
three,A,0.531552,
three,B,,0.384707
three,C,0.245434,
two,A,,-0.533591
two,B,-1.454002,
two,C,,-0.259619


In [102]:
# Time Series

In [103]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.resample('5Min').sum()

2012-01-01    24373
Freq: 5T, dtype: int64

In [104]:
rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D')
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2012-03-06   -0.533346
2012-03-07    0.029025
2012-03-08   -0.137224
2012-03-09   -0.048298
2012-03-10   -0.073989
Freq: D, dtype: float64

In [105]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-06 00:00:00+00:00   -0.533346
2012-03-07 00:00:00+00:00    0.029025
2012-03-08 00:00:00+00:00   -0.137224
2012-03-09 00:00:00+00:00   -0.048298
2012-03-10 00:00:00+00:00   -0.073989
Freq: D, dtype: float64

In [106]:
ts_utc.tz_convert('US/Eastern')

2012-03-05 19:00:00-05:00   -0.533346
2012-03-06 19:00:00-05:00    0.029025
2012-03-07 19:00:00-05:00   -0.137224
2012-03-08 19:00:00-05:00   -0.048298
2012-03-09 19:00:00-05:00   -0.073989
Freq: D, dtype: float64

In [110]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-01-31    0.307367
2012-02-29    1.757230
2012-03-31    1.991836
2012-04-30    1.291970
2012-05-31   -0.552277
Freq: M, dtype: float64

In [111]:
ps = ts.to_period()
ps

2012-01    0.307367
2012-02    1.757230
2012-03    1.991836
2012-04    1.291970
2012-05   -0.552277
Freq: M, dtype: float64

In [112]:
ps.to_timestamp()

2012-01-01    0.307367
2012-02-01    1.757230
2012-03-01    1.991836
2012-04-01    1.291970
2012-05-01   -0.552277
Freq: MS, dtype: float64

In [113]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts.head()

1990-03-01 09:00   -0.274809
1990-06-01 09:00    0.172362
1990-09-01 09:00    0.082314
1990-12-01 09:00    0.997878
1991-03-01 09:00    1.199996
Freq: H, dtype: float64

In [114]:
df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6],
                   "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})

In [115]:
df["grade"] = df["raw_grade"].astype("category")
df["grade"]

0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): ['a', 'b', 'e']

In [122]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   id         6 non-null      int64   
 1   raw_grade  6 non-null      object  
 2   grade      6 non-null      category
dtypes: category(1), int64(1), object(1)
memory usage: 334.0+ bytes


In [123]:
df["grade"].cat.categories = ["very good", "good", "very bad"]

In [124]:
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium",
                                              "good", "very good"])
df["grade"]

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): ['very bad', 'bad', 'medium', 'good', 'very good']

In [125]:
df.sort_values(by="grade")

Unnamed: 0,id,raw_grade,grade
5,6,e,very bad
1,2,b,good
2,3,b,good
0,1,a,very good
3,4,a,very good
4,5,a,very good


In [126]:
df.groupby("grade").size()

grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

In [None]:
# Plotting