In [1]:
import numpy as np

# 4 NumPy Basics: Arrays and Vectorized Computation

### Performance difference
1 million integers   
NumPy array vs Python list

In [1]:
import numpy as np

In [2]:
my_arr = np.arange(1000000)

In [3]:
my_list = list(range(1000000))

In [4]:
%time for _ in range(10): my_arr2 = my_arr * 2

CPU times: user 14.8 ms, sys: 11.3 ms, total: 26.1 ms
Wall time: 32.1 ms


In [5]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

CPU times: user 586 ms, sys: 164 ms, total: 750 ms
Wall time: 779 ms


#### NumPy-based algos generally 10 to 100 x faster than Python counterparts

# 4.1: ndarray

**ndarray:** N-dimensional array object
+ fast, flexible container for large datasets in Python

In [6]:
# Generate random data
data = np.random.randn(2, 3)

In [7]:
data

array([[ 0.75586639, -0.23944925, -0.14798485],
       [ 0.3788274 , -1.84200827, -3.19017663]])

In [8]:
data * 10

array([[  7.55866389,  -2.39449255,  -1.47984853],
       [  3.788274  , -18.42008274, -31.9017663 ]])

In [9]:
data + data

array([[ 1.51173278, -0.47889851, -0.29596971],
       [ 0.7576548 , -3.68401655, -6.38035326]])

##### Elements of ndarray must be same type
Every array has:
+ **shape:** tuple indicating size of each dimension
+ **dtype:** object describing data type of array

In [10]:
data.shape

(2, 3)

In [11]:
data.dtype

dtype('float64')

##### Creating ndarrays

In [12]:
data1 = [6, 7.5, 8, 0, 1]

In [13]:
arr1 = np.array(data1)

In [14]:
arr1

array([6. , 7.5, 8. , 0. , 1. ])

##### Nested sequences will be converted into multidimensional arrays

In [15]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [16]:
arr2 = np.array(data2)

In [17]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

##### dimensions = rows = (this value, elements in each dim/row)

In [18]:
arr2.ndim

2

In [19]:
arr2.shape

(2, 4)

In [20]:
arr1.dtype

dtype('float64')

In [21]:
arr2.dtype

dtype('int64')

In [23]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [24]:
np.zeros((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [27]:
np.empty((2, 3, 2))

array([[[1.72723371e-077, 1.72723371e-077],
        [4.94065646e-323, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000]],

       [[0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 1.72723371e-077]]])

##### arange() = range()

In [28]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [29]:
arr1 = np.array([1, 2, 3], dtype=np.float64)

In [30]:
arr2 = np.array([1, 2, 3], dtype=np.int32)

In [31]:
arr1.dtype

dtype('float64')

In [32]:
arr2.dtype

dtype('int32')

##### cast an array using astype()

In [33]:
arr = np.array([1, 2, 3, 4, 5])

In [34]:
arr.dtype

dtype('int64')

In [35]:
float_arr = arr.astype(np.float64)

In [36]:
float_arr.dtype

dtype('float64')

##### float -> integer, decimal is truncated (not rounded)

In [37]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])

In [38]:
arr

array([ 3.7, -1.2, -2.6,  0.5, 12.9, 10.1])

In [39]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

##### can convert strings representing numbers

In [40]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)

In [41]:
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

In [48]:
int_array = np.arange(10)

In [44]:
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)

In [49]:
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [50]:
empty_uint32 = np.empty(8, dtype='u4')

In [51]:
empty_uint32

array([        0, 805306368,         0, 805306368,        10,         0,
               0, 805306368], dtype=uint32)