### Merge

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
dframe1 = DataFrame({'key': ['X', 'Z', 'Y', 'Z', 'X', 'X'], 'data_set_1': np.arange(6)})
dframe1

Unnamed: 0,data_set_1,key
0,0,X
1,1,Z
2,2,Y
3,3,Z
4,4,X
5,5,X


In [4]:
dframe2 = DataFrame({'key': ['Q', 'Y', 'Z'], 'data_set_2': [1, 2, 3]})
dframe2

Unnamed: 0,data_set_2,key
0,1,Q
1,2,Y
2,3,Z


In [5]:
pd.merge(dframe1, dframe2)

Unnamed: 0,data_set_1,key,data_set_2
0,1,Z,3
1,3,Z,3
2,2,Y,2


In [6]:
pd.merge(dframe1, dframe2, on = 'key')
# on = 'key' specifies that key should be used for merging

Unnamed: 0,data_set_1,key,data_set_2
0,1,Z,3
1,3,Z,3
2,2,Y,2


In [8]:
pd.merge(dframe1, dframe2, on = 'key', how = 'left')
# how = 'left' specifies that the key for the data set on the left should be used for merging

Unnamed: 0,data_set_1,key,data_set_2
0,0,X,
1,1,Z,3.0
2,2,Y,2.0
3,3,Z,3.0
4,4,X,
5,5,X,


In [9]:
pd.merge(dframe1, dframe2, on = 'key', how = 'right')

Unnamed: 0,data_set_1,key,data_set_2
0,1.0,Z,3
1,3.0,Z,3
2,2.0,Y,2
3,,Q,1


In [10]:
pd.merge(dframe1, dframe2, on = 'key', how = 'outer')
# how = outer specifies that a union of both the keys should be used

Unnamed: 0,data_set_1,key,data_set_2
0,0.0,X,
1,4.0,X,
2,5.0,X,
3,1.0,Z,3.0
4,3.0,Z,3.0
5,2.0,Y,2.0
6,,Q,1.0


In [11]:
dframe3 = DataFrame({'key': ['X', 'X', 'X', 'Y', 'Z', 'Z'], 'data_set_3': range(6)})
dframe3

Unnamed: 0,data_set_3,key
0,0,X
1,1,X
2,2,X
3,3,Y
4,4,Z
5,5,Z


In [12]:
dframe4 = DataFrame({'key': ['Y', 'Y', 'X', 'X', 'Z'], 'data_set_4': range(5)})
dframe4

Unnamed: 0,data_set_4,key
0,0,Y
1,1,Y
2,2,X
3,3,X
4,4,Z


In [13]:
pd.merge(dframe3, dframe4)

Unnamed: 0,data_set_3,key,data_set_4
0,0,X,2
1,0,X,3
2,1,X,2
3,1,X,3
4,2,X,2
5,2,X,3
6,3,Y,0
7,3,Y,1
8,4,Z,4
9,5,Z,4


In [15]:
df_left = DataFrame({'key1': ['SF', 'SF', 'LA'], 'key2': ['one', 'two', 'one'], 
                    'left_data': [10, 20, 30]})
df_left

Unnamed: 0,key1,key2,left_data
0,SF,one,10
1,SF,two,20
2,LA,one,30


In [17]:
df_right = DataFrame({'key1': ['SF', 'SF', 'LA', 'LA'],
                     'key2': ['one', 'one', 'one', 'two'],
                     'right_data': [40, 50, 60, 70]})
df_right

Unnamed: 0,key1,key2,right_data
0,SF,one,40
1,SF,one,50
2,LA,one,60
3,LA,two,70


In [18]:
pd.merge(df_left, df_right, on = ['key1', 'key2'], how = 'outer')

Unnamed: 0,key1,key2,left_data,right_data
0,SF,one,10.0,40.0
1,SF,one,10.0,50.0
2,SF,two,20.0,
3,LA,one,30.0,60.0
4,LA,two,,70.0


In [19]:
pd.merge(df_left, df_right, on = 'key1')

Unnamed: 0,key1,key2_x,left_data,key2_y,right_data
0,SF,one,10,one,40
1,SF,one,10,one,50
2,SF,two,20,one,40
3,SF,two,20,one,50
4,LA,one,30,one,60
5,LA,one,30,two,70


In [20]:
pd.merge(df_left, df_right, on = 'key1', suffixes = ('_lefty', '_righty'))

Unnamed: 0,key1,key2_lefty,left_data,key2_righty,right_data
0,SF,one,10,one,40
1,SF,one,10,one,50
2,SF,two,20,one,40
3,SF,two,20,one,50
4,LA,one,30,one,60
5,LA,one,30,two,70


In [21]:
url = 'pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.merge.html'

### Merge on Index

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [2]:
df_left = DataFrame({'key': ['X', 'Y', 'Z', 'X', 'Y'], 'data': range(5)})
df_left

Unnamed: 0,data,key
0,0,X
1,1,Y
2,2,Z
3,3,X
4,4,Y


In [3]:
df_right = DataFrame({'group_data': [10, 20]}, index = ['X', 'Y'])
df_right

Unnamed: 0,group_data
X,10
Y,20


In [5]:
pd.merge(df_left, df_right, left_on = 'key', right_index = True)
# using the key for the left data and index for the right data to do the merge

Unnamed: 0,data,key,group_data
0,0,X,10
3,3,X,10
1,1,Y,20
4,4,Y,20


In [6]:
df_left_hr = DataFrame({'key1': ['SF', 'SF', 'SF', 'LA', 'LA'],
                       'key2': [10, 20, 30, 20, 30],
                       'data_set': np.arange(5.)})
df_left_hr

Unnamed: 0,data_set,key1,key2
0,0,SF,10
1,1,SF,20
2,2,SF,30
3,3,LA,20
4,4,LA,30


In [8]:
df_right_hr = DataFrame(np.arange(10).reshape(5, 2),
                       index = [['LA', 'LA', 'SF', 'SF', 'SF'],
                               [20, 10, 10, 10, 20]],
                               columns = ['col_1', 'col_2'])
df_right_hr

Unnamed: 0,Unnamed: 1,col_1,col_2
LA,20,0,1
LA,10,2,3
SF,10,4,5
SF,10,6,7
SF,20,8,9


In [9]:
pd.merge(df_left_hr, df_right_hr, left_on = ['key1', 'key2'], right_index = True)

Unnamed: 0,data_set,key1,key2,col_1,col_2
0,0,SF,10,4,5
0,0,SF,10,6,7
1,1,SF,20,8,9
3,3,LA,20,0,1


In [10]:
df_left.join(df_right)

Unnamed: 0,data,key,group_data
0,0,X,
1,1,Y,
2,2,Z,
3,3,X,
4,4,Y,


### Concatenate

In [11]:
arr1 = np.arange(9).reshape(3, 3)
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [12]:
np.concatenate([arr1, arr1], axis = 1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [13]:
np.concatenate([arr1, arr1], axis = 0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [14]:
ser1 = Series([0, 1, 2], index = ['T', 'U', 'V'])
ser1

T    0
U    1
V    2
dtype: int64

In [15]:
ser2 = Series([3, 4], index = ['X', 'Y'])
ser2

X    3
Y    4
dtype: int64

In [16]:
pd.concat([ser1, ser2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [17]:
pd.concat([ser1, ser2], axis = 1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [18]:
pd.concat([ser1, ser2], keys = ['cat1', 'cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [20]:
dframe1 = DataFrame(np.random.randn(4, 3), columns = ['X', 'Y', 'Z'])
dframe2 = DataFrame(np.random.randn(3, 3), columns = ['Y', 'Q', 'X'])

In [21]:
dframe1

Unnamed: 0,X,Y,Z
0,1.619255,-1.084826,-0.421465
1,1.473194,0.51872,-1.370699
2,1.133518,-0.196041,-1.202154
3,-0.693669,0.005137,-0.884732


In [22]:
dframe2

Unnamed: 0,Y,Q,X
0,-0.496635,-0.031675,0.523042
1,-0.759734,1.761438,-0.629277
2,0.042842,-1.100266,1.554596


In [23]:
pd.concat([dframe1, dframe2])

Unnamed: 0,Q,X,Y,Z
0,,1.619255,-1.084826,-0.421465
1,,1.473194,0.51872,-1.370699
2,,1.133518,-0.196041,-1.202154
3,,-0.693669,0.005137,-0.884732
0,-0.031675,0.523042,-0.496635,
1,1.761438,-0.629277,-0.759734,
2,-1.100266,1.554596,0.042842,


In [24]:
pd.concat([dframe1, dframe2], ignore_index = True)

Unnamed: 0,Q,X,Y,Z
0,,1.619255,-1.084826,-0.421465
1,,1.473194,0.51872,-1.370699
2,,1.133518,-0.196041,-1.202154
3,,-0.693669,0.005137,-0.884732
4,-0.031675,0.523042,-0.496635,
5,1.761438,-0.629277,-0.759734,
6,-1.100266,1.554596,0.042842,


In [26]:
url = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.tools.merge.concat.html'

### Combine DataFrames

In [27]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [28]:
ser1 = Series([2, np.nan, 4, np.nan, 6, np.nan], index = ['Q', 'R', 'S', 'T', 'U', 'V'])
ser1

Q     2
R   NaN
S     4
T   NaN
U     6
V   NaN
dtype: float64

In [29]:
ser2 = Series(np.arange(len(ser1)), dtype = np.float64, index = ['Q', 'R', 'S', 'T', 'U', 'V'])
ser2

Q    0
R    1
S    2
T    3
U    4
V    5
dtype: float64

In [31]:
# wherever ser1 is null, choose ser2, else, choose ser1
Series(np.where(pd.isnull(ser1), ser2, ser1), index = ser1.index)

Q    2
R    1
S    4
T    3
U    6
V    5
dtype: float64

In [32]:
ser1.combine_first(ser2)
# does the same thing as above

Q    2
R    1
S    4
T    3
U    6
V    5
dtype: float64

In [33]:
nan = np.nan
dframe_odds = DataFrame({'X': [1., nan, 3., nan], 'Y': [nan, 5., nan, 7.],
                        'Z': [nan, 9., nan, 11.]})
dframe_odds

Unnamed: 0,X,Y,Z
0,1.0,,
1,,5.0,9.0
2,3.0,,
3,,7.0,11.0


In [34]:
dframe_evens = DataFrame({'X': [2., 4., nan, 6., 8.], 'Y': [nan, 10., 12., 14., 16.]})
dframe_evens

Unnamed: 0,X,Y
0,2.0,
1,4.0,10.0
2,,12.0
3,6.0,14.0
4,8.0,16.0


In [35]:
dframe_odds.combine_first(dframe_evens)

Unnamed: 0,X,Y,Z
0,1,,
1,4,5.0,9.0
2,3,12.0,
3,6,7.0,11.0
4,8,16.0,


### Reshaping

In [43]:
dframe1 = DataFrame(np.arange(8).reshape(2, 4), index = pd.Index(['LA', 'SF'], name = 'city'),
                   columns = pd.Index(['A', 'B', 'C', 'D'], name = 'letter'))
# if you use pd.index, you can also give a name to the index
dframe1

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [44]:
dframe_st = dframe1.stack()
dframe_st

city  letter
LA    A         0
      B         1
      C         2
      D         3
SF    A         4
      B         5
      C         6
      D         7
dtype: int64

In [45]:
dframe_st.unstack()

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [46]:
dframe_st.unstack('letter')

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [47]:
dframe_st.unstack('city')

city,LA,SF
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [48]:
ser1 = Series([0, 1, 2], index = ['Q', 'X', 'Y'])
ser1

Q    0
X    1
Y    2
dtype: int64

In [49]:
ser2 = Series([4, 5, 6], index = ['X', 'Y', 'Z'])
ser2

X    4
Y    5
Z    6
dtype: int64

In [58]:
dframe = pd.concat([ser1, ser2], keys = ['Alpha', 'Beta'])
dframe

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: int64

In [59]:
dframe.unstack()

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1,2,
Beta,,4,5,6.0


In [60]:
dframe.unstack().stack()

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: float64

In [61]:
dframe = dframe.unstack()
dframe

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1,2,
Beta,,4,5,6.0


In [62]:
dframe.stack()

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: float64

In [63]:
dframe.stack(dropna = False)

Alpha  Q     0
       X     1
       Y     2
       Z   NaN
Beta   Q   NaN
       X     4
       Y     5
       Z     6
dtype: float64

### Pivoting

In [65]:
import pandas.util.testing as tm; tm.N = 3

def unpivot(frame):
    N, K = frame.shape
    data = {'value': frame.values.ravel('F'),
            'variable': np.asarray(frame.columns).repeat(N),
            'date': np.tile(np.asarray(frame.index), K)}
    
    return DataFrame(data, columns = ['date', 'variable', 'value'])

dframe = unpivot(tm.makeTimeDataFrame())

In [66]:
dframe

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.667717
1,2000-01-04,A,-2.369827
2,2000-01-05,A,-0.267547
3,2000-01-03,B,-0.477313
4,2000-01-04,B,-0.23253
5,2000-01-05,B,-0.211011
6,2000-01-03,C,1.361268
7,2000-01-04,C,-0.311659
8,2000-01-05,C,-0.866517
9,2000-01-03,D,-0.548036


In [67]:
dframe_piv = dframe.pivot('date', 'variable', 'value')
dframe_piv
# date = row, variable = column, value = fill value

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.667717,-0.477313,1.361268,-0.548036
2000-01-04,-2.369827,-0.23253,-0.311659,0.241135
2000-01-05,-0.267547,-0.211011,-0.866517,0.576828


### Duplicates

In [68]:
dframe = DataFrame({'key1': ['A'] * 2 + ['B'] * 3,
                    'key2': [2, 2, 2, 3, 3]})
dframe

Unnamed: 0,key1,key2
0,A,2
1,A,2
2,B,2
3,B,3
4,B,3


In [69]:
dframe.duplicated()

0    False
1     True
2    False
3    False
4     True
dtype: bool

In [70]:
dframe.drop_duplicates()

Unnamed: 0,key1,key2
0,A,2
2,B,2
3,B,3


In [71]:
dframe.drop_duplicates(['key1'])

Unnamed: 0,key1,key2
0,A,2
2,B,2


### Mapping

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
dframe = DataFrame({'city': ['Alma', 'Brian Head', 'Fox Park'],
                    'altitude': [3158, 3000, 2762]})
dframe

Unnamed: 0,altitude,city
0,3158,Alma
1,3000,Brian Head
2,2762,Fox Park


In [4]:
state_map = {'Alma': 'Colorado', 'Brian Head': 'Utah', 'Fox Park': 'Wyoming'}

In [6]:
dframe['state'] = dframe['city'].map(state_map)
dframe

Unnamed: 0,altitude,city,state
0,3158,Alma,Colorado
1,3000,Brian Head,Utah
2,2762,Fox Park,Wyoming


### Replace

In [7]:
ser1 = Series([1, 2, 3, 4, 1, 2, 3, 4])
ser1

0    1
1    2
2    3
3    4
4    1
5    2
6    3
7    4
dtype: int64

In [8]:
ser1.replace(1, np.nan)

0   NaN
1     2
2     3
3     4
4   NaN
5     2
6     3
7     4
dtype: float64

In [9]:
ser1.replace([1, 4], [100, 400])
# replaces 1 and 4 with 100 and 400 respectively

0    100
1      2
2      3
3    400
4    100
5      2
6      3
7    400
dtype: int64

In [11]:
ser1.replace({4: np.nan})
# replace 4 with nan

0     1
1     2
2     3
3   NaN
4     1
5     2
6     3
7   NaN
dtype: float64

### Rename Index

In [12]:
dframe = DataFrame(np.arange(12).reshape(3, 4), index = ['NY', 'LA', 'SF'],
                   columns = ['A', 'B', 'C', 'D'])
dframe

Unnamed: 0,A,B,C,D
NY,0,1,2,3
LA,4,5,6,7
SF,8,9,10,11


In [13]:
dframe.index.map(str.lower)

array(['ny', 'la', 'sf'], dtype=object)

In [14]:
dframe.index = dframe.index.map(str.lower)
dframe

Unnamed: 0,A,B,C,D
ny,0,1,2,3
la,4,5,6,7
sf,8,9,10,11


In [15]:
dframe.rename(index = str.title, columns = str.lower)

Unnamed: 0,a,b,c,d
Ny,0,1,2,3
La,4,5,6,7
Sf,8,9,10,11


In [16]:
dframe.rename(index = {'ny': 'NEW YORK'}, columns = {'A': 'ALPHA'})

Unnamed: 0,ALPHA,B,C,D
NEW YORK,0,1,2,3
la,4,5,6,7
sf,8,9,10,11


In [17]:
dframe

Unnamed: 0,A,B,C,D
ny,0,1,2,3
la,4,5,6,7
sf,8,9,10,11


In [19]:
dframe.rename(index = {'ny': 'NEW YORK'}, inplace = True)
dframe

Unnamed: 0,A,B,C,D
NEW YORK,0,1,2,3
la,4,5,6,7
sf,8,9,10,11


### Binning

In [20]:
years = [1990, 1991, 1992, 2008, 2012, 2015, 1987, 1969, 2013, 2008, 1999]

In [27]:
decade_bins = [1960, 1970, 1980, 1990, 2000, 2010, 2020]

In [28]:
decade_cat = pd.cut(years, decade_bins)
decade_cat

[(1980, 1990], (1990, 2000], (1990, 2000], (2000, 2010], (2010, 2020], ..., (1980, 1990], (1960, 1970], (2010, 2020], (2000, 2010], (1990, 2000]]
Length: 11
Categories (6, object): [(1960, 1970] < (1970, 1980] < (1980, 1990] < (1990, 2000] < (2000, 2010] < (2010, 2020]]

In [29]:
decade_cat.categories

Index([u'(1960, 1970]', u'(1970, 1980]', u'(1980, 1990]', u'(1990, 2000]',
       u'(2000, 2010]', u'(2010, 2020]'],
      dtype='object')

In [30]:
pd.value_counts(decade_cat)

(2010, 2020]    3
(1990, 2000]    3
(2000, 2010]    2
(1980, 1990]    2
(1960, 1970]    1
(1970, 1980]    0
dtype: int64

In [31]:
pd.cut(years, 2, precision = 1)

[(1969, 1992], (1969, 1992], (1969, 1992], (1992, 2015], (1992, 2015], ..., (1969, 1992], (1969, 1992], (1992, 2015], (1992, 2015], (1992, 2015]]
Length: 11
Categories (2, object): [(1969, 1992] < (1992, 2015]]

In [33]:
# (1969, 1992] = utilizes the concept of open and closed intervals.
# the above interval implies that 1969 is not included but 1992 is.

### Outliers

In [34]:
np.random.seed(12345)

In [35]:
dframe = DataFrame(np.random.randn(1000, 4))

In [36]:
dframe.head()

Unnamed: 0,0,1,2,3
0,-0.204708,0.478943,-0.519439,-0.55573
1,1.965781,1.393406,0.092908,0.281746
2,0.769023,1.246435,1.007189,-1.296221
3,0.274992,0.228913,1.352917,0.886429
4,-2.001637,-0.371843,1.669025,-0.43857


In [37]:
dframe.tail()

Unnamed: 0,0,1,2,3
995,1.089085,0.251232,-1.451985,1.653126
996,-0.478509,-0.010663,-1.060881,-1.50287
997,-1.946267,1.013592,0.037333,0.133304
998,-1.293122,-0.322542,-0.78296,-0.30334
999,0.089987,0.292291,1.177706,0.882755


In [38]:
dframe.describe()

Unnamed: 0,0,1,2,3
count,1000.0,1000.0,1000.0,1000.0
mean,-0.067684,0.067924,0.025598,-0.002298
std,0.998035,0.992106,1.006835,0.996794
min,-3.428254,-3.548824,-3.184377,-3.745356
25%,-0.77489,-0.591841,-0.641675,-0.644144
50%,-0.116401,0.101143,0.002073,-0.013611
75%,0.616366,0.780282,0.680391,0.654328
max,3.366626,2.653656,3.260383,3.927528


In [39]:
col = dframe[0]
col.head()

0   -0.204708
1    1.965781
2    0.769023
3    0.274992
4   -2.001637
Name: 0, dtype: float64

In [40]:
col[np.abs(col) > 3]

523   -3.428254
900    3.366626
Name: 0, dtype: float64

In [43]:
dframe[(np.abs(dframe) > 3).any(1)]
# returns all rows where atleast 1 column had a value > 3

Unnamed: 0,0,1,2,3
5,-0.539741,0.476985,3.248944,-1.021228
97,-0.774363,0.552936,0.106061,3.927528
102,-0.655054,-0.56523,3.176873,0.959533
305,-2.315555,0.457246,-0.025907,-3.399312
324,0.050188,1.951312,3.260383,0.963301
400,0.146326,0.508391,-0.196713,-3.745356
499,-0.293333,-0.242459,-3.05699,1.918403
523,-3.428254,-0.296336,-0.439938,-0.867165
586,0.275144,1.179227,-3.184377,1.369891
808,-0.362528,-3.548824,1.553205,-2.186301


In [44]:
dframe[np.abs(dframe) > 3] = np.sign(dframe) * 3
# if abs(val) > 3, set it to -3 or +3
# np.sign(-9) = -1, np.sign(8) = 1

In [45]:
dframe.describe()

Unnamed: 0,0,1,2,3
count,1000.0,1000.0,1000.0,1000.0
mean,-0.067623,0.068473,0.025153,-0.002081
std,0.995485,0.990253,1.003977,0.989736
min,-3.0,-3.0,-3.0,-3.0
25%,-0.77489,-0.591841,-0.641675,-0.644144
50%,-0.116401,0.101143,0.002073,-0.013611
75%,0.616366,0.780282,0.680391,0.654328
max,3.0,2.653656,3.0,3.0


### Permutation

In [46]:
dframe = DataFrame(np.arange(16).reshape(4, 4))

In [48]:
blender = np.random.permutation(4)
blender
# create a random permutation of range(4)

array([1, 3, 0, 2])

In [49]:
dframe.take(blender)

Unnamed: 0,0,1,2,3
1,4,5,6,7
3,12,13,14,15
0,0,1,2,3
2,8,9,10,11


In [50]:
box = np.array([1, 2, 3])

In [51]:
shaker = np.random.randint(0, len(box), size = 10)
# permutation with replace, select 10 numbers in the range (0, len(box))

In [52]:
shaker

array([0, 2, 0, 2, 0, 2, 2, 2, 0, 2])

In [53]:
hand_grabs = box.take(shaker)
hand_grabs

array([1, 3, 1, 3, 1, 3, 3, 3, 1, 3])