In [1]:
# numpy-based algorithms are faster, taking less memory
import numpy as np
my_arr = np.arange(1000000)
my_list = list(range(1000000))
%time for _ in range(10): my_arr2 = my_arr * 2

CPU times: user 16.7 ms, sys: 19 ms, total: 35.8 ms
Wall time: 40.8 ms


In [2]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

CPU times: user 621 ms, sys: 264 ms, total: 886 ms
Wall time: 985 ms


In [3]:
data = np.random.randn(2,3)
data

array([[ 0.96041078,  0.50773456,  1.027512  ],
       [ 0.69572909, -0.49588943,  1.516583  ]])

In [4]:
data.shape

(2, 3)

In [5]:
data.dtype

dtype('float64')

In [6]:
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [7]:
# transform nested sequence into multidimensional array
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [8]:
arr2.ndim

2

In [9]:
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [10]:
np.empty((2,3,2))

array([[[6.90275293e-310, 1.00537063e-316],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 1.50008929e+248]],

       [[4.50620083e-144, 4.82412328e+228],
        [1.04718130e-142, 1.14428494e+243],
        [1.35617218e+248, 2.78225511e+296]]])

In [11]:
arr1 = np.array([1,2,3],dtype=np.float64)
arr1

array([1., 2., 3.])

In [12]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr.astype(np.int32) # the decimal part will be truncated

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [13]:
numeric_strings = np.array(['1.25','-9.6','42'], dtype=np.string_)
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

In [14]:
int_array = np.arange(10)
int_array.astype(numeric_strings.dtype) # use another array's dtype attribute

array([b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'],
      dtype='|S4')

In [18]:
# element-wise operation
arr = np.array([[1.,2.,3,],[4.,5.,6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [19]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [20]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [21]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [22]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [23]:
arr2 = np.array([[0.,4.,1.],[7.,2.,12.]])

In [24]:
arr2 > arr

array([[False,  True, False],
       [ True, False,  True]])

In [25]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
arr[5:8]

array([5, 6, 7])

In [27]:
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [28]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [29]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [37]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [40]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [41]:
arr2d[:,:1]

array([[1],
       [4],
       [7]])

In [39]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [38]:
arr2d[2]

array([7, 8, 9])

In [31]:
arr2d[0][2]

3

In [32]:
arr2d[0,2]

3

In [34]:
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [35]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [36]:
arr3d[1,0]

array([7, 8, 9])

In [43]:
# Boolean indexing
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
data = np.random.randn(7,4)
data

array([[ 0.55942338,  0.14791394,  1.04568652,  0.36044895],
       [-0.32777573,  0.14465921,  1.58953729,  1.3212214 ],
       [-0.67241841, -0.44185474, -0.09486738, -1.51380814],
       [ 0.76307828,  1.05515806, -1.27200601, -1.90429895],
       [-0.13654081, -1.62879949,  0.32157339, -0.43128917],
       [ 0.46069493,  1.10311085, -1.13656091, -0.96808705],
       [ 1.3397144 ,  0.48824789,  1.67894567, -0.10048196]])

In [44]:
data[names == 'Bob']

array([[ 0.55942338,  0.14791394,  1.04568652,  0.36044895],
       [ 0.76307828,  1.05515806, -1.27200601, -1.90429895]])

In [45]:
data[names == 'Bob', 2:]

array([[ 1.04568652,  0.36044895],
       [-1.27200601, -1.90429895]])

In [47]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [48]:
data[~(names == 'Bob')]

array([[-0.32777573,  0.14465921,  1.58953729,  1.3212214 ],
       [-0.67241841, -0.44185474, -0.09486738, -1.51380814],
       [-0.13654081, -1.62879949,  0.32157339, -0.43128917],
       [ 0.46069493,  1.10311085, -1.13656091, -0.96808705],
       [ 1.3397144 ,  0.48824789,  1.67894567, -0.10048196]])

In [49]:
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [50]:
data[mask]

array([[ 0.55942338,  0.14791394,  1.04568652,  0.36044895],
       [-0.67241841, -0.44185474, -0.09486738, -1.51380814],
       [ 0.76307828,  1.05515806, -1.27200601, -1.90429895],
       [-0.13654081, -1.62879949,  0.32157339, -0.43128917]])

In [52]:
data[data < 0] = 0
data

array([[0.55942338, 0.14791394, 1.04568652, 0.36044895],
       [0.        , 0.14465921, 1.58953729, 1.3212214 ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.76307828, 1.05515806, 0.        , 0.        ],
       [0.        , 0.        , 0.32157339, 0.        ],
       [0.46069493, 1.10311085, 0.        , 0.        ],
       [1.3397144 , 0.48824789, 1.67894567, 0.        ]])

In [54]:
arr = np.empty((8,4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [55]:
arr[[4,3,0,6]] # Fancy indexing

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [56]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [57]:
arr[[1,5,7,2],[0,3,1,2]]

array([ 4, 23, 29, 10])

In [59]:
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [60]:
arr.T # transpose

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [61]:
arr = np.arange(16).reshape((2,2,4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [62]:
arr.transpose((1,0,2))   # transpose for n-dimension array

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [63]:
arr.swapaxes(1,2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [64]:
# element-wise 
x = np.random.randn(8)
y = np.random.randn(8)
np.maximum(x,y) 

array([-0.25469021, -1.04825845,  1.53727018, -0.61643451,  0.85694635,
        0.07813328,  1.03206005, -1.29421169])

In [65]:
arr = np.random.randn(7)*5
arr

array([-3.43324411, -4.30765101,  5.0739311 , -7.11080026,  2.14731216,
        1.53117414,  1.10243488])

In [67]:
remainder, whole_part = np.modf(arr)
remainder

array([-0.43324411, -0.30765101,  0.0739311 , -0.11080026,  0.14731216,
        0.53117414,  0.10243488])

In [68]:
# where
xarr = np.array([1.1,1.2,1.3,1.4,1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True,False,True,True,False])
result = np.where(cond, xarr, yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

In [69]:
arr = np.random.randn(4,4)
arr

array([[-1.29059647,  1.0817667 , -0.16891924, -1.04240113],
       [ 0.04235236, -0.14512705,  0.38192631, -1.01883174],
       [-0.7122872 ,  0.65619831,  0.24293816, -0.32512483],
       [-0.42370445,  1.09868433, -1.40937349,  0.11095686]])

In [70]:
np.where(arr > 0, 2, -2)

array([[-2,  2, -2, -2],
       [ 2, -2,  2, -2],
       [-2,  2,  2, -2],
       [-2,  2, -2,  2]])

In [71]:
np.where(arr > 0, 2, arr) # set only positive values to 2

array([[-1.29059647,  2.        , -0.16891924, -1.04240113],
       [ 2.        , -0.14512705,  2.        , -1.01883174],
       [-0.7122872 ,  2.        ,  2.        , -0.32512483],
       [-0.42370445,  2.        , -1.40937349,  2.        ]])

In [72]:
# statistical methods
arr = np.random.randn(5,4)
arr.mean(axis=1)

array([-0.30397576,  0.63984279, -0.15205523, -0.60871463, -0.10476833])

In [73]:
arr = np.array([0,1,2,3,4,5,6,7])
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28])

In [75]:
arr = np.random.randn(100)
(arr > 0).sum()

48

In [76]:
# Boolean array
bools = np.array([False, False, True, False])
bools.any()

True

In [77]:
bools.all()

False

In [78]:
large_arr = np.random.randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile

-1.6082220803255596

In [80]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

In [81]:
# pseudorandom number generation
samples = np.random.normal(size=(4,4))
samples

array([[-0.14094466, -1.5825346 ,  1.31518424, -1.0386097 ],
       [ 1.5239994 , -0.42103041, -0.29633005,  0.42164143],
       [ 0.63537963, -0.81072242, -0.74981991,  0.51166083],
       [-0.29746672, -0.20623688,  0.29709936, -0.81716889]])

In [82]:
np.random.seed(1234)

In [84]:
# Example: random walks 
# 1. a pure Python way using a built-in random module
import random 
position = 0
walk = [position]
steps = 1000
for i in range(steps):
    step = 1 if random.randint(0,1) else -1
    position += step
    walk.append(position)

In [87]:
# 2. walk is the cumulative sum of random steps and generate the random numbers at onece
nsteps = 1000
draws = np.random.randint(0,2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

In [88]:
# first crossing time 
(np.abs(walk) >= 10).argmax() # argmax returns the first index of the max value in the boolean array (True is the max value)

13

In [89]:
# 3. simulating many random walks at once
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0,2,size=(nwalks,nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks

array([[  1,   2,   3, ..., -26, -27, -28],
       [ -1,  -2,  -3, ..., -12, -13, -12],
       [  1,   0,   1, ...,  46,  45,  44],
       ...,
       [ -1,  -2,  -3, ...,  32,  33,  34],
       [ -1,   0,   1, ...,  -6,  -5,  -4],
       [ -1,  -2,  -1, ..., -18, -19, -18]])

In [91]:
hits30 = (np.abs(walks) >= 30).any(1)
hits30

array([ True,  True,  True, ...,  True, False,  True])

In [93]:
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()

510.07157707157705