In [2]:
# numpy notes and examples
# ndarray - efficient multidimensionall array, arithmetic operations
# math functions on arrays 
# tools to read/write array data to disk/memory 
# Linear algebra, random nums, Fourier transforms
# C API 

# numpy internally stores data in continguous blocks of memory to speed up numerical computation
# complex computation on entire arrays 



In [3]:
import numpy as np 
my_arr = np.arange(1000000)
my_list = list(range(10000000))

%time for _ in range(10): my_arr2 = my_arr * 2 


Wall time: 15.6 ms


In [4]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list] 

Wall time: 15.8 s


In [5]:
import numpy as np 

data = np.random.randn(2, 3)

data



array([[-0.45019879,  1.85364491, -0.29117068],
       [ 0.27389652, -2.25284566, -0.4613722 ]])

In [6]:
data * 10 


array([[ -4.50198788,  18.53644908,  -2.9117068 ],
       [  2.73896518, -22.52845661,  -4.61372196]])

In [7]:
data + data

array([[-0.90039758,  3.70728982, -0.58234136],
       [ 0.54779304, -4.50569132, -0.92274439]])

In [8]:
# pg 87

## 4.2, page 105 ##

In [9]:
arr = np.arange(10)
np.sqrt(arr) 

    

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [10]:
np.exp(arr)


array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [11]:
x = np.random.randn(8)

In [12]:
y = np.random.randn(8)

In [13]:
y

array([ 1.44123193, -1.65180302,  0.84555411,  0.3312907 , -0.62719699,
       -0.54934117,  0.38882259, -1.1798343 ])

In [14]:
 np.maximum(x, y) 

array([ 1.44123193,  0.37982435,  0.84555411,  0.3312907 , -0.62719699,
        0.54008325,  0.38882259,  0.41637198])

In [15]:
arr = np.random.randn(7) * 5
arr


array([-4.09084947, -7.10674676, -0.42245498,  2.42285042,  0.68659243,
       -1.28921507,  3.20744724])

In [16]:
remainder, whole_part = np.modf(arr)
remainder, whole_part


(array([-0.09084947, -0.10674676, -0.42245498,  0.42285042,  0.68659243,
        -0.28921507,  0.20744724]), array([-4., -7., -0.,  2.,  0., -1.,  3.]))

## 4.3 pg. 108 ##

In [17]:
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
xs, ys = np.meshgrid(points, points)
ys
xs

array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       ...,
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])

In [18]:
z = np.sqrt(xs ** 2 + ys ** 2)
z

array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
        7.06400028],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       ...,
       [7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
        7.04279774],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568]])

In [19]:
import matplotlib.pyplot as plt
plt.imshow(z, cmap=plt.cm.gray); 
plt.colorbar() 

<matplotlib.colorbar.Colorbar at 0x1588343e0b8>

In [21]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)]
result


[1.1, 2.2, 1.3, 1.4, 2.5]

In [22]:
result = np.where(cond, xarr, yarr)


In [23]:
arr = np.random.randn(4, 4)
arr

array([[-1.91629606, -1.07145977, -0.81636811, -0.37944042],
       [-1.46364325,  0.64540394, -0.40393293, -1.06174452],
       [ 0.29790228,  2.36763241,  0.44336083,  0.48495592],
       [-0.34788659,  1.1054284 , -2.30261805, -0.18092105]])

In [24]:
arr > 0

array([[False, False, False, False],
       [False,  True, False, False],
       [ True,  True,  True,  True],
       [False,  True, False, False]])

In [25]:
np.where(arr > 0, 2, -2)

array([[-2, -2, -2, -2],
       [-2,  2, -2, -2],
       [ 2,  2,  2,  2],
       [-2,  2, -2, -2]])

In [26]:
np.where(arr > 0, 2, arr) # set only positive values to 2 

array([[-1.91629606, -1.07145977, -0.81636811, -0.37944042],
       [-1.46364325,  2.        , -0.40393293, -1.06174452],
       [ 2.        ,  2.        ,  2.        ,  2.        ],
       [-0.34788659,  2.        , -2.30261805, -0.18092105]])

In [27]:
arr = np.random.randn(5, 4)


In [28]:
arr.mean()

0.3600895652024273

In [29]:
np.mean(arr)

0.3600895652024273

In [30]:
arr.sum()

7.201791304048546

In [31]:
arr.mean(axis=1) 

array([ 0.08435343,  0.39963233,  0.74896737,  0.70336006, -0.13586536])

In [32]:
arr.sum(axis=0)

array([ 3.41222565,  2.07465556,  2.45838751, -0.7434774 ])

In [33]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr.cumsum()


array([ 0,  1,  3,  6, 10, 15, 21, 28], dtype=int32)

In [34]:
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr


array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [35]:
arr.cumsum(axis=0) 

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]], dtype=int32)

In [36]:
arr.cumprod(axis=1)

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]], dtype=int32)

In [37]:
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive values 

50

In [38]:
bools = np.array([False, False, True, False])
bools.any()


True

In [39]:
bools.all()

False

In [40]:
arr = np.random.randn(6)
arr.sort()
arr

array([-1.46462741, -1.33112847, -0.74428521, -0.11256033,  0.89341388,
        1.69260781])

In [41]:
arr = np.random.randn(5, 3)
arr.sort(1)
arr

array([[-1.62667021,  0.04512388,  0.74975305],
       [-0.9998988 , -0.20344395,  0.6975952 ],
       [-2.72522077, -0.24564364,  0.82475572],
       [-0.63789467,  0.02389317,  1.04379663],
       [-1.24024515,  0.38122431,  1.03235127]])

In [42]:
large_arr = np.random.randn(1000)
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile
    

-1.6479180475633766

In [43]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)


array(['Bob', 'Joe', 'Will'], dtype='<U4')

In [44]:
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

array([1, 2, 3, 4])

In [45]:
sorted(set(names))

['Bob', 'Joe', 'Will']

In [46]:
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])

array([ True, False, False,  True,  True, False,  True])

## 4.4 File Input and Output with Arrays, pg. 115 ##

In [47]:
arr = np.arange(10)
np.save('some_array', arr)
np.load('some_array.npy')

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [48]:
np.savez('array_archive.npz', a=arr, b=arr)
arch = np.load('array_archive.npz')
arch['b']

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [49]:
np.savez_compressed('arrays_compressed.npz', a=arr, b=arr) 