# Pandas Documentation on Essential Basic Functionality

In this notebook, you will work through the Pandas documentation on DataFrames.

## Imports

In [39]:
import numpy as np
import pandas as pd

## Pandas essential basic functionality

In this notebook, you are going to learn how to use Pandas by typing the code from the Pandas documentation into this notebook.

* Go to the Pandas [Essential Basic Functionality](http://pandas.pydata.org/pandas-docs/stable/basics.html#essential-basic-functionality).
* Type all of the code from that section of the documentation into this notebook and get it working.
* **To learn this API well, you must type the code rather than copy and pasting it**.
* Create a new cell in this section for each `In[]` prompt in the documentation.
* Ignore the cells in the **Grading** section below.
* No Markdown comments are needed.
* Skip the following sub-sections:
  - Tablewise Function Application
  - Applying with a Panel

## Grading

YOUR ANSWER HERE

In [40]:
index = pd.date_range('1/1/2000', periods=8)

In [41]:
s=pd.Series(np.random.randn(5), index=['a','b','c','d','e'])

In [42]:
df=pd.DataFrame(np.random.randn(8,3),index=index,columns=['A','B','C'])

In [43]:
wp=pd.Panel(np.random.randn(2,5,4),items=['Item1','Item2'],major_axis=pd.date_range('1/1/2000',
                                                                                   periods=5),
           minor_axis=['A', 'B','C','D'])

In [44]:
long_series = pd.Series(np.random.randn(1000))

In [45]:
long_series.head()

0   -0.073205
1   -0.114206
2   -0.575932
3   -1.440345
4    0.615606
dtype: float64

In [46]:
long_series.tail(3)

997    1.518488
998   -0.818496
999    0.042606
dtype: float64

In [47]:
df[:2]

Unnamed: 0,A,B,C
2000-01-01,-1.794103,0.799872,-2.169662
2000-01-02,-1.833845,2.27024,3.235942


In [48]:
df.columns = [x.lower() for x in df.columns]

In [49]:
df

Unnamed: 0,a,b,c
2000-01-01,-1.794103,0.799872,-2.169662
2000-01-02,-1.833845,2.27024,3.235942
2000-01-03,0.282423,1.181882,1.135506
2000-01-04,0.360566,2.227198,-0.930466
2000-01-05,1.409662,1.892564,0.759375
2000-01-06,0.005291,-0.126529,0.66589
2000-01-07,-0.345323,2.282855,0.247059
2000-01-08,-0.033642,0.859817,0.965075


In [50]:
s.values

array([ 0.59877157, -0.37731783, -0.90632655, -0.53160763,  0.46716787])

In [51]:
df.values

array([[-1.79410329,  0.79987175, -2.16966226],
       [-1.8338452 ,  2.27024046,  3.23594235],
       [ 0.2824227 ,  1.1818817 ,  1.13550571],
       [ 0.36056604,  2.22719834, -0.93046636],
       [ 1.40966221,  1.89256407,  0.7593753 ],
       [ 0.00529093, -0.12652855,  0.66588968],
       [-0.34532285,  2.28285511,  0.24705881],
       [-0.0336417 ,  0.85981701,  0.96507494]])

In [52]:
wp.values

array([[[-1.10700919, -0.34657347,  0.33253509,  1.27538873],
        [ 1.21057225,  0.42523312,  0.89509033, -0.43907848],
        [-1.45801421, -0.40646294, -1.78377468,  0.84673988],
        [ 0.65614276,  1.38255473,  2.40587165,  1.29251269],
        [-1.28609415,  0.23305843, -0.31720475,  0.70183054]],

       [[-1.52672784,  1.69772386, -1.76970736, -0.08242753],
        [-0.03394475,  1.48323082,  0.32804426,  0.58178328],
        [ 0.68604128, -0.13865969, -0.66083939,  0.01620805],
        [ 1.56304758, -0.71133015, -0.62344293, -0.3289263 ],
        [-0.13578129, -1.65419024, -0.23608944, -0.28778551]]])

In [53]:
df = pd.DataFrame({'one':pd.Series(np.random.randn(3),index=['a','b','c']),
                   'two':pd.Series(np.random.randn(4),index=['a','b','c','d']),
                   'three':pd.Series(np.random.randn(3),index=['b','c','d'])
        
    })

In [54]:
df

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [55]:
row = df.ix[1]

In [56]:
column = df['two']

In [57]:
df.sub(row,axis='columns')

Unnamed: 0,one,three,two
a,0.048676,,0.5311
b,0.0,0.0,0.0
c,0.227451,-0.073959,1.909319
d,,-1.502859,0.162687


In [58]:
df.sub(row,axis=1)

Unnamed: 0,one,three,two
a,0.048676,,0.5311
b,0.0,0.0,0.0
c,0.227451,-0.073959,1.909319
d,,-1.502859,0.162687


In [59]:
df.sub(column,axis='index')

Unnamed: 0,one,three,two
a,0.057041,,0
b,0.539464,2.103131,0
c,-1.142404,0.119853,0
d,,0.437585,0


In [60]:
df.sub(column,axis=0)

Unnamed: 0,one,three,two
a,0.057041,,0
b,0.539464,2.103131,0
c,-1.142404,0.119853,0
d,,0.437585,0


In [61]:
dfmi=df.copy()

In [62]:
dfmi.index=pd.MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')],
                                    names=['first','second'])

In [63]:
dfmi.sub(columns,axis=0,level='second')

Unnamed: 0_level_0,Unnamed: 1_level_0,one,three,two
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,a,1.14617,,1.089129
1,b,0.223608,1.787275,-0.315856
1,c,0.454407,1.716665,1.596812
2,a,,1.158302,0.720716


In [64]:
major_mean=wp.mean(axis='major')

In [65]:
major_mean

Unnamed: 0,Item1,Item2
A,-0.396881,0.110527
B,0.257562,0.135355
C,0.306504,-0.592407
D,0.735479,-0.02023


In [66]:
wp.sub(major_mean,axis='major')

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis)
Items axis: Item1 to Item2
Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00
Minor_axis axis: A to D

In [67]:
df

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [69]:
df2 = df.copy()

In [70]:
df2['three']['a'] = 1.

In [71]:
df2

Unnamed: 0,one,three,two
a,-0.042018,1.0,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [72]:
df+df2

Unnamed: 0,one,three,two
a,-0.084036,,-0.198117
b,-0.181389,2.945945,-1.260317
c,0.273513,2.798028,2.558322
d,,-0.059772,-0.934943


In [73]:
df.add(df2,fill_value=0)

Unnamed: 0,one,three,two
a,-0.084036,1.0,-0.198117
b,-0.181389,2.945945,-1.260317
c,0.273513,2.798028,2.558322
d,,-0.059772,-0.934943


In [74]:
df.gt(df2)

Unnamed: 0,one,three,two
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [75]:
df.ne(df)

Unnamed: 0,one,three,two
a,False,True,False
b,False,False,False
c,False,False,False
d,True,False,False


In [76]:
(df>0).all()

one      False
three    False
two      False
dtype: bool

In [77]:
(df >0).any()

one      True
three    True
two      True
dtype: bool

In [78]:
(df>0).any().any()

True

In [79]:
df.empty

False

In [80]:
pd.DataFrame(columns=list('ABC')).empty

True

In [81]:
pd.Series([True]).bool()

True

In [82]:
pd.Series([False]).bool()

False

In [83]:
pd.DataFrame([True]).bool()

True

In [84]:
pd.DataFrame([False]).bool()

False

In [85]:
df+df==df*2

Unnamed: 0,one,three,two
a,True,False,True
b,True,True,True
c,True,True,True
d,False,True,True


In [86]:
(df+df==df*2).all()

one      False
three    False
two       True
dtype: bool

In [87]:
np.nan==np.nan

False

In [88]:
(df+df).equals(df*2)

True

In [89]:
df1=pd.DataFrame({'col':['foo',0,'np.nan']})

In [90]:
df2=pd.DataFrame({'col':[np.nan,0,'foo']},index=[2,1,0])

In [91]:
df1.equals(df2)

False

In [92]:
df1.equals(df2.sort_index())

False

In [93]:
pd.Series(['foo','bar','baz']) =='foo'

0     True
1    False
2    False
dtype: bool

In [94]:
pd.Index(['foo','bar','baz']) == 'foo'

array([ True, False, False], dtype=bool)

In [95]:
pd.Series(['foo','bar','baz']) == pd.Index(['foo','bar','qux'])

0     True
1     True
2    False
dtype: bool

In [96]:
pd.Series(['foo','bar','baz'])==np.array(['foo','bar','qux'])

0     True
1     True
2    False
dtype: bool

In [97]:
pd.Series(['foo','bar','baz']) == pd.Series(['foo','bar'])

ValueError: Series lengths must match to compare

In [98]:
pd.Series(['foo','bar','baz']) == pd.Series(['foo'])

ValueError: Series lengths must match to compare

In [100]:
np.array([1,2,3]) == np.array([2])

array([False,  True, False], dtype=bool)

In [101]:
np.array([1,2,3]) == np.array([1,2])

  if __name__ == '__main__':


False

In [102]:
df1=pd.DataFrame({'A': [1,np.nan,3.,5.,np.nan],
                 'B' : [np.nan,2.,3.,np.nan,6.]})

In [103]:
df2=pd.DataFrame({'A':[5.,2.,4.,np.nan,3.,7.],
                 'B' : [np.nan,np.nan,3.,4.,6.,8.]})

In [104]:
df1

Unnamed: 0,A,B
0,1.0,
1,,2.0
2,3.0,3.0
3,5.0,
4,,6.0


In [105]:
df2

Unnamed: 0,A,B
0,5.0,
1,2.0,
2,4.0,3.0
3,,4.0
4,3.0,6.0
5,7.0,8.0


In [106]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1,
1,2,2.0
2,3,3.0
3,5,4.0
4,3,6.0
5,7,8.0


In [107]:
combiner = lambda x, y:np.where(pd.isnull(x),y,x)

In [108]:
df1.combine(df2,combiner)

Unnamed: 0,A,B
0,1,
1,2,2.0
2,3,3.0
3,5,4.0
4,3,6.0
5,7,8.0


In [109]:
df

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [110]:
df.mean(0)

one      0.001348
three    0.947367
two      0.020618
dtype: float64

In [111]:
df.mean(1)

a   -0.070538
b    0.250707
c    0.938310
d   -0.248679
dtype: float64

In [112]:
df.sum(0,skipna=False)

one           NaN
three         NaN
two      0.082472
dtype: float64

In [113]:
df.sum(axis=1,skipna=True)

a   -0.141077
b    0.752120
c    2.814931
d   -0.497358
dtype: float64

In [114]:
ts_stand = (df-df.mean())/df.std()

In [115]:
ts_stand.std()

one      1
three    1
two      1
dtype: float64

In [117]:
xs_stand=df.sub(df.mean(1),axis=0).div(df.std(1),axis=0)

In [118]:
xs_stand.std(1)

a    1
b    1
c    1
d    1
dtype: float64

In [119]:
df.cumsum()

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.132712,1.472972,-0.729217
c,0.004044,2.871986,0.549944
d,,2.8421,0.082472


In [120]:
np.mean(df['one'])

0.0013480518599562896

In [121]:
np.mean(df['one'].values)

nan

In [122]:
series = pd.Series(np.random.randn(500))

In [123]:
series[20:500] = np.nan

In [124]:
series[10:20] = 5

In [125]:
series.nunique()

11

In [126]:
series=pd.Series(np.random.randn(1000))

In [127]:
series[::2] = np.nan

In [128]:
series.describe()

count    500.000000
mean       0.054544
std        0.927100
min       -2.992268
25%       -0.532561
50%        0.116138
75%        0.727228
max        3.344839
dtype: float64

In [129]:
frame=pd.DataFrame(np.random.randn(100,5),columns=['a','b','c','d','e'])

In [130]:
frame.ix[::2] = np.nan

In [131]:
frame.describe()

Unnamed: 0,a,b,c,d,e
count,50.0,50.0,50.0,50.0,50.0
mean,-0.048925,-0.0471,0.067429,-0.1538,0.186046
std,0.960291,1.036713,1.067017,1.030288,0.9284
min,-1.685197,-2.067082,-2.207051,-2.904869,-1.913641
25%,-0.888718,-0.804493,-0.401861,-0.862672,-0.31184
50%,-0.029162,-0.169125,0.035123,-0.154141,0.157112
75%,0.55605,0.917965,0.789303,0.599471,0.925472
max,2.50557,2.307749,2.59453,2.323921,1.865715


In [132]:
series.describe(percentiles=[.05,.25,.75,.95])

count    500.000000
mean       0.054544
std        0.927100
min       -2.992268
5%        -1.550391
25%       -0.532561
50%        0.116138
75%        0.727228
95%        1.483898
max        3.344839
dtype: float64

In [133]:
s=pd.Series(['a','a','b','b','a','a',np.nan,'c','d','a'])

In [134]:
s.describe()

count     9
unique    4
top       a
freq      5
dtype: object

In [135]:
frame = pd.DataFrame({'a':['Yes','Yes','No','No'],'b': range(4)})

In [139]:
frame.describe()

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [140]:
frame.describe(include=['object'])

Unnamed: 0,a
count,4
unique,2
top,Yes
freq,2


In [141]:
frame.describe(include=['number'])

Unnamed: 0,b
count,4.0
mean,1.5
std,1.290994
min,0.0
25%,0.75
50%,1.5
75%,2.25
max,3.0


In [142]:
frame.describe(include='all')

Unnamed: 0,a,b
count,4,4.0
unique,2,
top,Yes,
freq,2,
mean,,1.5
std,,1.290994
min,,0.0
25%,,0.75
50%,,1.5
75%,,2.25


In [143]:
s1 = pd.Series(np.random.randn(5))

In [144]:
s1

0    0.700367
1    0.454306
2    1.068132
3    0.557516
4   -0.000666
dtype: float64

In [145]:
s1.idxmin(),s1.idxmax()

(4, 2)

In [146]:
df1=pd.DataFrame(np.random.randn(5,3),columns=['A','B','C'])

In [147]:
df1

Unnamed: 0,A,B,C
0,0.040453,0.637649,0.320908
1,1.191611,-0.44078,1.763521
2,1.974356,1.73874,1.63557
3,0.113916,0.482608,-0.678533
4,-1.008203,-0.302922,0.823078


In [148]:
df1.idxmin(axis=0)

A    4
B    1
C    3
dtype: int64

In [149]:
df1.idxmax(axis=1)

0    B
1    C
2    A
3    B
4    C
dtype: object

In [150]:
df3 = pd.DataFrame([2,1,1,3,np.nan],columns=['A'],index=list('edcba'))

In [151]:
df3

Unnamed: 0,A
e,2.0
d,1.0
c,1.0
b,3.0
a,


In [152]:
df3['A'].idxmin()

'd'

In [153]:
data = np.random.randint(0,7,size=50)

In [154]:
data

array([1, 1, 1, 2, 2, 1, 5, 5, 0, 6, 0, 3, 6, 2, 1, 6, 1, 4, 1, 5, 1, 2, 6,
       4, 5, 6, 4, 5, 1, 1, 2, 0, 1, 0, 4, 1, 0, 0, 2, 6, 3, 5, 1, 5, 2, 4,
       1, 6, 6, 1])

In [155]:
s = pd.Series(data)

In [156]:
s.value_counts()

1    15
6     8
5     7
2     7
0     6
4     5
3     2
dtype: int64

In [157]:
pd.value_counts(data)

1    15
6     8
5     7
2     7
0     6
4     5
3     2
dtype: int64

In [158]:
s5=pd.Series([1,1,3,3,3,5,5,7,7,7])

In [159]:
s5.mode()

0    3
1    7
dtype: int64

In [160]:
df5=pd.DataFrame({"A":np.random.randint(0,7,size=50),
                 "B" :np.random.randint(-10,15,size=50)})

In [161]:
df5.mode()

Unnamed: 0,A,B
0,4,12.0
1,5,


In [162]:
arr = np.random.randn(20)

In [163]:
factor = pd.cut(arr,4)

In [164]:
factor

[(-1.489, -0.16], (1.169, 2.498], (1.169, 2.498], (-1.489, -0.16], (-0.16, 1.169], ..., (-0.16, 1.169], (-1.489, -0.16], (-1.489, -0.16], (1.169, 2.498], (-0.16, 1.169]]
Length: 20
Categories (4, object): [(-2.823, -1.489] < (-1.489, -0.16] < (-0.16, 1.169] < (1.169, 2.498]]

In [165]:
factor = pd.cut(arr,[-5,-1,0,1,5])

In [166]:
factor

[(-1, 0], (1, 5], (1, 5], (-5, -1], (0, 1], ..., (0, 1], (-1, 0], (-1, 0], (1, 5], (0, 1]]
Length: 20
Categories (4, object): [(-5, -1] < (-1, 0] < (0, 1] < (1, 5]]

In [167]:
arr=np.random.randn(30)

In [168]:
factor=pd.qcut(arr,[0,.25,.5,.75,1])

In [169]:
factor

[(0.287, 2.733], (0.287, 2.733], (-0.111, 0.287], (-0.55, -0.111], (0.287, 2.733], ..., [-1.424, -0.55], [-1.424, -0.55], (-0.55, -0.111], (-0.55, -0.111], (0.287, 2.733]]
Length: 30
Categories (4, object): [[-1.424, -0.55] < (-0.55, -0.111] < (-0.111, 0.287] < (0.287, 2.733]]

In [170]:
pd.value_counts(factor)

(0.287, 2.733]     8
[-1.424, -0.55]    8
(-0.111, 0.287]    7
(-0.55, -0.111]    7
dtype: int64

In [171]:
arr=np.random.randn(20)

In [172]:
factor=pd.cut(arr,[-np.inf,0,np.inf])

In [173]:
factor

[(-inf, 0], (-inf, 0], (-inf, 0], (0, inf], (0, inf], ..., (0, inf], (-inf, 0], (0, inf], (-inf, 0], (-inf, 0]]
Length: 20
Categories (2, object): [(-inf, 0] < (0, inf]]

In [174]:
df.apply(np.mean)

one      0.001348
three    0.947367
two      0.020618
dtype: float64

In [175]:
df.apply(np.mean,axis=1)

a   -0.070538
b    0.250707
c    0.938310
d   -0.248679
dtype: float64

In [176]:
df.apply(lambda x: x.max()-x.min())

one      0.227451
three    1.502859
two      1.909319
dtype: float64

In [177]:
df.apply(np.cumsum)

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.132712,1.472972,-0.729217
c,0.004044,2.871986,0.549944
d,,2.8421,0.082472


In [178]:
df.apply(np.exp)

Unnamed: 0,one,three,two
a,0.958853,,0.90569
b,0.913297,4.362182,0.532507
c,1.146549,4.051203,3.593623
d,,0.970556,0.626585


In [196]:
tsdf = pd.DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'],
                    index=pd.date_range('1/1/2000', periods=1000))

In [197]:
tsdf.apply(lambda x: x.idxmax())

A   2002-04-20
B   2002-05-21
C   2001-11-27
dtype: datetime64[ns]

In [198]:
def subtract_and_divide(x,sub,divide=1):
    return (x-sub)/divide

In [199]:
df.apply(subtract_and_divide,args=(5,), divide=3)

Unnamed: 0,one,three,two
a,-1.680673,,-1.699686
b,-1.696898,-1.175676,-1.876719
c,-1.621081,-1.200329,-1.24028
d,,-1.676629,-1.822491


In [202]:
tsdf = pd.DataFrame(np.random.randn(10,3),columns=['A','B','C'],
                   index=pd.date_range('1/1/2000',periods=10))

In [203]:
tsdf.values[3:7] = np.nan

In [204]:
tsdf

Unnamed: 0,A,B,C
2000-01-01,-1.038183,0.775048,-0.253848
2000-01-02,-0.400741,0.85752,-0.73821
2000-01-03,0.838685,-1.239548,-0.852525
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-08,0.059218,0.727447,-1.287799
2000-01-09,-0.211216,0.27721,-0.62851
2000-01-10,0.597574,0.865551,-1.72641


In [205]:
tsdf.apply(pd.Series.interpolate)

Unnamed: 0,A,B,C
2000-01-01,-1.038183,0.775048,-0.253848
2000-01-02,-0.400741,0.85752,-0.73821
2000-01-03,0.838685,-1.239548,-0.852525
2000-01-04,0.682792,-0.846149,-0.93958
2000-01-05,0.526898,-0.45275,-1.026634
2000-01-06,0.371005,-0.059351,-1.113689
2000-01-07,0.215112,0.334048,-1.200744
2000-01-08,0.059218,0.727447,-1.287799
2000-01-09,-0.211216,0.27721,-0.62851
2000-01-10,0.597574,0.865551,-1.72641


In [206]:
df4 = df.copy()

In [207]:
df4

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [208]:
f=lambda x: len(str(x))

In [209]:
df4['one'].map(f)

a    16
b    16
c    14
d     3
Name: one, dtype: int64

In [210]:
df4.apply(f)

one      81
three    83
two      81
dtype: int64

In [211]:
df4.applymap(f)

Unnamed: 0,one,three,two
a,16,3,16
b,16,13,15
c,14,13,13
d,3,16,15


In [212]:
s = pd.Series(['siz','seven','six','seven','six'],
             index=['a','b','c','d','e'])

In [213]:
t = pd.Series({'six':6.,'seven':7.})

In [214]:
s

a      siz
b    seven
c      six
d    seven
e      six
dtype: object

In [215]:
s.map(t)

a   NaN
b     7
c     6
d     7
e     6
dtype: float64

In [216]:
s = pd.Series(np.random.randn(5),index=['a','b','c','d','e'])

In [217]:
s


a    0.463370
b   -0.926404
c    1.901123
d    0.181473
e   -0.580843
dtype: float64

In [218]:
s.reindex(['e','b','f','d'])

e   -0.580843
b   -0.926404
f         NaN
d    0.181473
dtype: float64

In [219]:
df

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [228]:
df.reindex(index=['c','f','b'],columns=['three','two','one'])

Unnamed: 0,three,two,one
c,1.399014,1.279161,0.136756
f,,,
b,1.472972,-0.630158,-0.090694


In [229]:
rs=s.reindex(df.index)

In [230]:
rs

a    0.463370
b   -0.926404
c    1.901123
d    0.181473
dtype: float64

In [231]:
rs.index is df.index

True

In [233]:
df2 = df.reindex(['a','b','c'],columns=['one','two'])

In [234]:
df3 = df2-df2.mean()

In [235]:
df2

Unnamed: 0,one,two
a,-0.042018,-0.099059
b,-0.090694,-0.630158
c,0.136756,1.279161


In [236]:
df3

Unnamed: 0,one,two
a,-0.043366,-0.282373
b,-0.092042,-0.813473
c,0.135408,1.095846


In [237]:
df.reindex_like(df2)

Unnamed: 0,one,two
a,-0.042018,-0.099059
b,-0.090694,-0.630158
c,0.136756,1.279161


In [239]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])

In [240]:
s1 = s[:4]

In [241]:
s2=s[1:]

In [242]:
s1.align(s2)

(a   -2.565398
 b    1.272513
 c   -1.283152
 d    0.695905
 e         NaN
 dtype: float64, a         NaN
 b    1.272513
 c   -1.283152
 d    0.695905
 e    0.968843
 dtype: float64)

In [243]:
s1.align(s2,join='inner')

(b    1.272513
 c   -1.283152
 d    0.695905
 dtype: float64, b    1.272513
 c   -1.283152
 d    0.695905
 dtype: float64)

In [244]:
s1.align(s2,join='left')

(a   -2.565398
 b    1.272513
 c   -1.283152
 d    0.695905
 dtype: float64, a         NaN
 b    1.272513
 c   -1.283152
 d    0.695905
 dtype: float64)

In [245]:
df.align(df2,join='inner')

(        one       two
 a -0.042018 -0.099059
 b -0.090694 -0.630158
 c  0.136756  1.279161,         one       two
 a -0.042018 -0.099059
 b -0.090694 -0.630158
 c  0.136756  1.279161)

In [246]:
df.align(df2,join='inner',axis=0)

(        one     three       two
 a -0.042018       NaN -0.099059
 b -0.090694  1.472972 -0.630158
 c  0.136756  1.399014  1.279161,         one       two
 a -0.042018 -0.099059
 b -0.090694 -0.630158
 c  0.136756  1.279161)

In [247]:
df.align(df2.ix[0],axis=1)

(        one     three       two
 a -0.042018       NaN -0.099059
 b -0.090694  1.472972 -0.630158
 c  0.136756  1.399014  1.279161
 d       NaN -0.029886 -0.467472, one     -0.042018
 three         NaN
 two     -0.099059
 Name: a, dtype: float64)

In [248]:
rng = pd.date_range('1/3/2000',periods=8)

In [249]:
ts=pd.Series(np.random.randn(8), index=rng)

In [250]:
ts2=ts[[0,3,6]]

In [251]:
ts

2000-01-03    0.366048
2000-01-04    2.124372
2000-01-05    0.464580
2000-01-06   -0.382999
2000-01-07   -0.627791
2000-01-08   -1.100246
2000-01-09    1.032198
2000-01-10    0.211724
Freq: D, dtype: float64

In [252]:
ts2

2000-01-03    0.366048
2000-01-06   -0.382999
2000-01-09    1.032198
dtype: float64

In [253]:
ts2.reindex(ts.index)

2000-01-03    0.366048
2000-01-04         NaN
2000-01-05         NaN
2000-01-06   -0.382999
2000-01-07         NaN
2000-01-08         NaN
2000-01-09    1.032198
2000-01-10         NaN
Freq: D, dtype: float64

In [254]:
ts2.reindex(ts.index,method='ffill')

2000-01-03    0.366048
2000-01-04    0.366048
2000-01-05    0.366048
2000-01-06   -0.382999
2000-01-07   -0.382999
2000-01-08   -0.382999
2000-01-09    1.032198
2000-01-10    1.032198
Freq: D, dtype: float64

In [255]:
ts2.reindex(ts.index,method='bfill')

2000-01-03    0.366048
2000-01-04   -0.382999
2000-01-05   -0.382999
2000-01-06   -0.382999
2000-01-07    1.032198
2000-01-08    1.032198
2000-01-09    1.032198
2000-01-10         NaN
Freq: D, dtype: float64

In [256]:
ts2.reindex(ts.index,method='nearest')

2000-01-03    0.366048
2000-01-04    0.366048
2000-01-05   -0.382999
2000-01-06   -0.382999
2000-01-07   -0.382999
2000-01-08    1.032198
2000-01-09    1.032198
2000-01-10    1.032198
Freq: D, dtype: float64

In [257]:
ts2.reindex(ts.index).fillna(method='ffill')

2000-01-03    0.366048
2000-01-04    0.366048
2000-01-05    0.366048
2000-01-06   -0.382999
2000-01-07   -0.382999
2000-01-08   -0.382999
2000-01-09    1.032198
2000-01-10    1.032198
Freq: D, dtype: float64

In [258]:
ts2.reindex(ts.index,method='ffill',limit=1)

2000-01-03    0.366048
2000-01-04    0.366048
2000-01-05         NaN
2000-01-06   -0.382999
2000-01-07   -0.382999
2000-01-08         NaN
2000-01-09    1.032198
2000-01-10    1.032198
Freq: D, dtype: float64

In [259]:
ts2.reindex(ts.index,method='ffill',tolerance='1 day')

2000-01-03    0.366048
2000-01-04    0.366048
2000-01-05         NaN
2000-01-06   -0.382999
2000-01-07   -0.382999
2000-01-08         NaN
2000-01-09    1.032198
2000-01-10    1.032198
Freq: D, dtype: float64

In [260]:
df

Unnamed: 0,one,three,two
a,-0.042018,,-0.099059
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
d,,-0.029886,-0.467472


In [261]:
df.drop(['a','d'],axis=0)

Unnamed: 0,one,three,two
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161


In [262]:
df.drop(['one'],axis=1)

Unnamed: 0,three,two
a,,-0.099059
b,1.472972,-0.630158
c,1.399014,1.279161
d,-0.029886,-0.467472


In [263]:
df.reindex(df.index.difference(['a','d']))

Unnamed: 0,one,three,two
b,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161


In [264]:
s

a   -2.565398
b    1.272513
c   -1.283152
d    0.695905
e    0.968843
dtype: float64

In [266]:
s.rename(str.upper)

A   -2.565398
B    1.272513
C   -1.283152
D    0.695905
E    0.968843
dtype: float64

In [269]:
df.rename(columns={'one' : 'foo','two':'bar'},
          index={'a': 'apple','b': 'banana','d': 'durian'})

Unnamed: 0,foo,three,bar
apple,-0.042018,,-0.099059
banana,-0.090694,1.472972,-0.630158
c,0.136756,1.399014,1.279161
durian,,-0.029886,-0.467472


In [270]:
df = pd.DataFrame({'col1' : np.random.randn(3),'col2' : np.random.randn(3)},
                 index=['a','b','c'])

In [271]:
for col in df:
    print(col)

col1
col2


In [272]:
for item, frame in wp.iteritems():
    print(item)
    print(frame)

Item1
                   A         B         C         D
2000-01-01 -1.107009 -0.346573  0.332535  1.275389
2000-01-02  1.210572  0.425233  0.895090 -0.439078
2000-01-03 -1.458014 -0.406463 -1.783775  0.846740
2000-01-04  0.656143  1.382555  2.405872  1.292513
2000-01-05 -1.286094  0.233058 -0.317205  0.701831
Item2
                   A         B         C         D
2000-01-01 -1.526728  1.697724 -1.769707 -0.082428
2000-01-02 -0.033945  1.483231  0.328044  0.581783
2000-01-03  0.686041 -0.138660 -0.660839  0.016208
2000-01-04  1.563048 -0.711330 -0.623443 -0.328926
2000-01-05 -0.135781 -1.654190 -0.236089 -0.287786


In [273]:
for row_index, row in df.iterrows():
    print('%s\n%s' % (row_index,row))

a
col1    0.712509
col2    1.094860
Name: a, dtype: float64
b
col1   -1.139924
col2    1.898140
Name: b, dtype: float64
c
col1    0.750319
col2    0.721183
Name: c, dtype: float64


In [274]:
df_orig=pd.DataFrame([[1,1.5]],columns=['int','float'])

In [275]:
df_orig.dtypes

int        int64
float    float64
dtype: object

In [276]:
row = next(df_orig.iterrows())[1]

In [277]:
row

int      1.0
float    1.5
Name: 0, dtype: float64

In [278]:
row['int'].dtype

dtype('float64')

In [279]:
df_orig['int'].dtype

dtype('int64')

In [280]:
df2=pd.DataFrame({'x':[1,2,3],'y':[4,5,6]})

In [281]:
print(df2)

   x  y
0  1  4
1  2  5
2  3  6


In [282]:
print(df2.T)

   0  1  2
x  1  2  3
y  4  5  6


In [283]:
df2_t=pd.DataFrame(dict((idx,values) for idx, values in df2.iterrows()))

In [284]:
print(df2_t)

   0  1  2
x  1  2  3
y  4  5  6


In [286]:
for row in df.itertuples():
    print(row)

Pandas(Index='a', col1=0.71250936245192864, col2=1.0948603433347937)
Pandas(Index='b', col1=-1.1399244320676742, col2=1.8981398699462706)
Pandas(Index='c', col1=0.75031880156674291, col2=0.7211831033171614)


In [288]:
s = pd.Series(pd.date_range('20130101 09:10:12', periods=4))


In [289]:
s

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]

In [290]:
s.dt.hour

0    9
1    9
2    9
3    9
dtype: int64

In [292]:
s.dt.second

0    12
1    12
2    12
3    12
dtype: int64

In [293]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [294]:
s[s.dt.day==2]

1   2013-01-02 09:10:12
dtype: datetime64[ns]

In [295]:
stz=s.dt.tz_localize('US/Eastern')

In [296]:
stz

0   2013-01-01 09:10:12-05:00
1   2013-01-02 09:10:12-05:00
2   2013-01-03 09:10:12-05:00
3   2013-01-04 09:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [297]:
stz.dt.tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

In [298]:
s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

0   2013-01-01 04:10:12-05:00
1   2013-01-02 04:10:12-05:00
2   2013-01-03 04:10:12-05:00
3   2013-01-04 04:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [299]:
pd.Series(pd.date_range('20130101',periods=4))

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: datetime64[ns]

In [301]:
s.dt.strftime('%Y/%m/%d')

0    2013/01/01
1    2013/01/02
2    2013/01/03
3    2013/01/04
dtype: object

In [302]:
s = pd.Series(pd.period_range('20130101',periods=4))

In [303]:
s.dt.strftime('%Y/%m/%d')

0    2013/01/01
1    2013/01/02
2    2013/01/03
3    2013/01/04
dtype: object

In [304]:
s = pd.Series(pd.period_range('20130101',periods=4,freq='D'))

In [305]:
s

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: object

In [306]:
s.dt.year

0    2013
1    2013
2    2013
3    2013
dtype: int64

In [307]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [308]:
s=pd.Series(pd.timedelta_range('1 day 00:00:05',periods=4,freq='s'))

In [309]:
s

0   1 days 00:00:05
1   1 days 00:00:06
2   1 days 00:00:07
3   1 days 00:00:08
dtype: timedelta64[ns]

In [310]:
s.dt.days

0    1
1    1
2    1
3    1
dtype: int64

In [311]:
s.dt.seconds

0    5
1    6
2    7
3    8
dtype: int64

In [312]:
s.dt.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,1,0,0,5,0,0,0
1,1,0,0,6,0,0,0
2,1,0,0,7,0,0,0
3,1,0,0,8,0,0,0


In [313]:
s = pd.Series(['A','B','C','Aaba','Baca', np.nan,'CABA','dog','cat'])

In [314]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [315]:
unsorted_df=df.reindex(index=['a','d','c','b'],
                      columns=['three','two','one'])

In [316]:
unsorted_df.sort_index()

Unnamed: 0,three,two,one
a,,,
b,,,
c,,,
d,,,


In [317]:
unsorted_df.sort_index(ascending=False)

Unnamed: 0,three,two,one
d,,,
c,,,
b,,,
a,,,


In [318]:
unsorted_df.sort_index(axis=1)

Unnamed: 0,one,three,two
a,,,
d,,,
c,,,
b,,,


In [319]:
unsorted_df['three'].sort_index()

a   NaN
b   NaN
c   NaN
d   NaN
Name: three, dtype: float64

In [321]:
df1=pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})

In [322]:
df1.sort_values(by='two')

Unnamed: 0,one,three,two
0,2,5,1
2,1,3,2
1,1,4,3
3,1,2,4


In [323]:
df1[['one','two','three']].sort_values(by=['one','two'])

Unnamed: 0,one,two,three
2,1,2,3
1,1,3,4
3,1,4,2
0,2,1,5


In [324]:
s[2] = np.nan

In [325]:
s.sort_values()

0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
2     NaN
5     NaN
dtype: object

In [326]:
s.sort_values(na_position='first')

2     NaN
5     NaN
0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
dtype: object

In [327]:
ser=pd.Series([1,2,3])

In [328]:
ser.searchsorted([0,3])

array([0, 2])

In [330]:
ser.searchsorted([0,4])

array([0, 3])

In [331]:
ser.searchsorted([1,3],side='right')

array([1, 3])

In [332]:
ser.searchsorted([1,3],side='left')

array([0, 2])

In [333]:
ser = pd.Series([3,1,2])

In [334]:
ser.searchsorted([0,3], sorter=np.argsort(ser))

array([0, 2])

In [335]:
s = pd.Series(np.random.permutation(10))

In [336]:
s

0    7
1    4
2    5
3    2
4    9
5    6
6    1
7    3
8    8
9    0
dtype: int64

In [338]:
s.sort_values()

9    0
6    1
3    2
7    3
1    4
2    5
5    6
0    7
8    8
4    9
dtype: int64

In [339]:
s.nsmallest(3)

9    0
6    1
3    2
dtype: int64

In [341]:
df = pd.DataFrame({'a':[-2,-1,1,10,8,11,-1],
                 'b': list('abdceff'),
                 'c': [1.0,2.0,4.0,3.2,np.nan,3.0,4.0]})

In [342]:
df.nlargest(3,'a')

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,


In [343]:
df.nlargest(5,['a','c'])

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,
2,1,d,4.0
1,-1,b,2.0


In [344]:
df.nsmallest(3,'a')

Unnamed: 0,a,b,c
0,-2,a,1
1,-1,b,2
6,-1,f,4


In [346]:
df.nsmallest(5 ,['a','c'])

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
6,-1,f,4.0
2,1,d,4.0
4,8,e,


In [347]:
df1.columns = pd.MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')])

In [348]:
df1.sort_values(by=('a','two'))

Unnamed: 0_level_0,a,a,b
Unnamed: 0_level_1,one,two,three
3,1,2,4
2,1,3,2
1,1,4,3
0,2,5,1


In [351]:
dft = pd.DataFrame(dict(A = np.random.rand(3),
                       B=1,
                       C='foo',
                       D=pd.Timestamp('20010102'),
                       E=pd.Series([1.0]*3).astype('float32'),
                       F=False,
                       G=pd.Series([1]*3,dtype='int8')))

In [352]:
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.993395,1,foo,2001-01-02,1,False,1
1,0.467072,1,foo,2001-01-02,1,False,1
2,0.224546,1,foo,2001-01-02,1,False,1


In [353]:
dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [354]:
dft['A'].dtype

dtype('float64')

In [355]:
pd.Series([1,2,3,4,5,6])

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [356]:
pd.Series([1,2,3,6.,'foo'])

0      1
1      2
2      3
3      6
4    foo
dtype: object

In [357]:
dft.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [358]:
df1=pd.DataFrame(np.random.randn(8),columns=['A'],dtype='float32')

In [359]:
df1

Unnamed: 0,A
0,0.961968
1,0.650658
2,1.046347
3,0.339642
4,-1.347192
5,0.432548
6,0.974563
7,-0.309191


In [360]:
df1.dtypes

A    float32
dtype: object

In [363]:
df2=pd.DataFrame(dict(A=pd.Series(np.random.randn(8),dtype='float16'),
                     B=pd.Series(np.random.randn(8)),
                     C=pd.Series(np.array(np.random.randn(8),dtype='uint8')) ))

In [364]:
df2

Unnamed: 0,A,B,C
0,-0.962891,0.956336,0
1,-0.088928,-1.581292,1
2,0.13855,-0.987444,1
3,-3.177734,-0.542172,0
4,0.908691,0.565049,0
5,2.3125,-0.310559,1
6,-1.170898,1.495125,255
7,-0.244629,-0.195439,0


In [365]:
pd.DataFrame([1,2],columns=['a']).dtypes

a    int64
dtype: object

In [366]:
pd.DataFrame({'a':[1,2]}).dtypes

a    int64
dtype: object

In [367]:
pd.DataFrame({'a':1}, index=list(range(2))).dtypes

a    int64
dtype: object

In [368]:
frame=pd.DataFrame(np.array([1,2]))

In [369]:
df3=df1.reindex_like(df2).fillna(value=0.0) + df2

In [370]:
df3

Unnamed: 0,A,B,C
0,-0.000922,0.956336,0
1,0.56173,-1.581292,1
2,1.184897,-0.987444,1
3,-2.838092,-0.542172,0
4,-0.438501,0.565049,0
5,2.745048,-0.310559,1
6,-0.196335,1.495125,255
7,-0.55382,-0.195439,0


In [371]:
df3.dtypes

A    float32
B    float64
C    float64
dtype: object

In [373]:
df3.values.dtype

dtype('float64')

In [374]:
df3

Unnamed: 0,A,B,C
0,-0.000922,0.956336,0
1,0.56173,-1.581292,1
2,1.184897,-0.987444,1
3,-2.838092,-0.542172,0
4,-0.438501,0.565049,0
5,2.745048,-0.310559,1
6,-0.196335,1.495125,255
7,-0.55382,-0.195439,0


In [375]:
df3.dtypes

A    float32
B    float64
C    float64
dtype: object

In [376]:
df3.astype('float32').dtypes

A    float32
B    float32
C    float32
dtype: object

In [377]:
df3['D'] = '1.'

In [378]:
df3['E']='1'

In [379]:
df3.convert_objects(convert_numeric=True).dtypes

  if __name__ == '__main__':


A    float32
B    float64
C    float64
D    float64
E      int64
dtype: object

In [380]:
df3['D']=df3['D'].astype('float16')

In [381]:
df3['E'] = df3['E'].astype('int32')

In [382]:
df3.dtypes

A    float32
B    float64
C    float64
D    float16
E      int32
dtype: object

In [383]:
import datetime

In [384]:
s=pd.Series([datetime.datetime(2001,1,1,0,0),
            'foo',1.0,1,pd.Timestamp('20010104'),
            '20010105'],dtype='O')

In [385]:
s

0    2001-01-01 00:00:00
1                    foo
2                      1
3                      1
4    2001-01-04 00:00:00
5               20010105
dtype: object

In [387]:
s.convert_objects(convert_dates='coerce')

  if __name__ == '__main__':


0   2001-01-01
1          NaT
2          NaT
3          NaT
4   2001-01-04
5   2001-01-05
dtype: datetime64[ns]

In [388]:
dfi=df3.astype('int32')

In [389]:
dfi['E']=1

In [390]:
dfi

Unnamed: 0,A,B,C,D,E
0,0,0,0,1,1
1,0,-1,1,1,1
2,1,0,1,1,1
3,-2,0,0,1,1
4,0,0,0,1,1
5,2,0,1,1,1
6,0,1,255,1,1
7,0,0,0,1,1


In [393]:
casted = dfi[dfi>0]

In [394]:
casted

Unnamed: 0,A,B,C,D,E
0,,,,1,1
1,,,1.0,1,1
2,1.0,,1.0,1,1
3,,,,1,1
4,,,,1,1
5,2.0,,1.0,1,1
6,,1.0,255.0,1,1
7,,,,1,1


In [395]:
casted.dtypes

A    float64
B    float64
C    float64
D      int32
E      int64
dtype: object

In [396]:
dfa=df3.copy()

In [397]:
dfa['A'] = dfa['A'].astype('float32')

In [398]:
dfa.dtypes

A    float32
B    float64
C    float64
D    float16
E      int32
dtype: object

In [399]:
casted = dfa[df2>0]

In [400]:
casted

Unnamed: 0,A,B,C,D,E
0,,0.956336,,,
1,,,1.0,,
2,1.184897,,1.0,,
3,,,,,
4,-0.438501,0.565049,,,
5,2.745048,,1.0,,
6,,1.495125,255.0,,
7,,,,,


In [401]:
casted.dtypes

A    float32
B    float64
C    float64
D    float16
E    float64
dtype: object

In [402]:
df = pd.DataFrame({'string': list('abc'),
                   'int64': list(range(1,4)),
                   'uint8':np.arange(3,6).astype('u1'),
                   'float64':np.arange(4.0,7.0),
                   'bool1':[True,False,True],
                   'bool2':[False,True,False],
                   'dates':pd.date_range('now',periods=3).values,
                   'category':pd.Series(list("ABC")).astype('category')
        
    })

In [403]:
df['tdeltas'] = df.dates.diff()

In [404]:
df['uint64'] = np.arange(3,6).astype('u8')

In [405]:
df['other_dates'] = pd.date_range('20130101',periods=3).values

In [406]:
df['tz_aware_dates']=pd.date_range('20130101',periods=3,tz='US/Eastern')

In [407]:
df

Unnamed: 0,bool1,bool2,category,dates,float64,int64,string,uint8,tdeltas,uint64,other_dates,tz_aware_dates
0,True,False,A,2016-02-16 14:08:01.752769,4,1,a,3,NaT,3,2013-01-01,2013-01-01 00:00:00-05:00
1,False,True,B,2016-02-17 14:08:01.752769,5,2,b,4,1 days,4,2013-01-02,2013-01-02 00:00:00-05:00
2,True,False,C,2016-02-18 14:08:01.752769,6,3,c,5,1 days,5,2013-01-03,2013-01-03 00:00:00-05:00


In [408]:
df.dtypes

bool1                                   bool
bool2                                   bool
category                            category
dates                         datetime64[ns]
float64                              float64
int64                                  int64
string                                object
uint8                                  uint8
tdeltas                      timedelta64[ns]
uint64                                uint64
other_dates                   datetime64[ns]
tz_aware_dates    datetime64[ns, US/Eastern]
dtype: object

In [409]:
df.select_dtypes(include=[bool])

Unnamed: 0,bool1,bool2
0,True,False
1,False,True
2,True,False


In [410]:
df.select_dtypes(include=['bool'])

Unnamed: 0,bool1,bool2
0,True,False
1,False,True
2,True,False


In [411]:
df.select_dtypes(include=['number','bool'],exclude=['unsignedinteger'])

Unnamed: 0,bool1,bool2,float64,int64,tdeltas
0,True,False,4,1,NaT
1,False,True,5,2,1 days
2,True,False,6,3,1 days


In [412]:
df.select_dtypes(include=['object'])

Unnamed: 0,string
0,a
1,b
2,c


In [420]:
def subdtypes(dtype):
    subs=dtype.__subclasses__()
    if not subs:
        return dtype
    return [dtype,[subdtypes(dt) for dt in subs]]

In [421]:
subdtypes(np.generic)

[numpy.generic,
 [[numpy.number,
   [[numpy.integer,
     [[numpy.signedinteger,
       [numpy.int8,
        numpy.int16,
        numpy.int32,
        numpy.int64,
        numpy.int64,
        numpy.timedelta64]],
      [numpy.unsignedinteger,
       [numpy.uint8,
        numpy.uint16,
        numpy.uint64,
        numpy.uint64,
        numpy.uint32]]]],
    [numpy.inexact,
     [[numpy.floating,
       [numpy.float16, numpy.float32, numpy.float64, numpy.float128]],
      [numpy.complexfloating,
       [numpy.complex64, numpy.complex128, numpy.complex256]]]]]],
  [numpy.flexible,
   [[numpy.character, [numpy.bytes_, numpy.str_]],
    [numpy.void, [numpy.record]]]],
  numpy.bool_,
  numpy.datetime64,
  numpy.object_]]