# NumPy Basics: Arrays and Vectorized Computation

In [None]:
%matplotlib inline

In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [4]:
np.random.seed(12345)

In [None]:
# help(np.set_printoptions)

In [6]:
my_arr = np.arange(1000000)
my_list = list(range(1000000))
my_arr

array([     0,      1,      2, ..., 999997, 999998, 999999])

# WHY NOT LISTS

Lists could incorporate heterogenous datatypes while numpy array can not. So why lists?

1. Less memory consumption
2. speed(numpy arrays) >> speed(lists) 
3. some operations can not be performed on lists but can be on np arrays (like division of two arrays)

In [7]:
%time for _ in range(10): my_arr2 = my_arr * 2
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

CPU times: user 32.7 ms, sys: 8.18 ms, total: 40.9 ms
Wall time: 40.3 ms
CPU times: user 662 ms, sys: 159 ms, total: 821 ms
Wall time: 820 ms


## The NumPy ndarray: A Multidimensional Array Object

In [8]:
# Generate some random data
data = np.random.randn(2, 3)


array([[-0.20470766,  0.47894334, -0.51943872],
       [-0.5557303 ,  1.96578057,  1.39340583]])

In [9]:
print(data * 10)
data + data

[[-2.04707659  4.78943338 -5.19438715]
 [-5.55730304 19.65780573 13.93405833]]


array([[-0.40941532,  0.95788668, -1.03887743],
       [-1.11146061,  3.93156115,  2.78681167]])

In [10]:
print(data.shape)
data.dtype

(2, 3)


dtype('float64')

### Creating ndarrays

In [11]:
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [12]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

# DIY

## If dimensional value of arr2 is m x n, find m and n

In [17]:
list1 = [1,2,'a']
npArray = np.array(list1)
npArray

array(['1', '2', 'a'], dtype='<U21')

In [13]:
print(arr2.ndim)
arr2.shape

2


(2, 4)

In [14]:
print(arr1.dtype)
print(arr2.dtype)

float64
int64


In [18]:
print(np.zeros(10))
np.zeros((3, 6))
np.empty((2, 3, 2))

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


array([[[6.89938806e-310, 4.68261395e-310],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000]],

       [[0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000],
        [0.00000000e+000, 0.00000000e+000]]])

# What would the below mentioned method do?

In [None]:
print(np.arange(15))

### Arithmetic with NumPy Arrays

In [19]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr
arr * arr
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [20]:
1 / arr
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [22]:
print(arr)
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2
arr2 > arr

[[1. 2. 3.]
 [4. 5. 6.]]


array([[False,  True, False],
       [ True, False,  True]])

### Basic Indexing and Slicing

In [23]:
arr = np.arange(10)
arr
arr[5]
arr[5:8]
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [24]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [25]:
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [26]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [27]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [28]:
arr2d[0][2]
arr2d[0, 2]

3

In [29]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [30]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
old_values = arr3d[0].copy()
arr3d[0] = 42
arr3d
arr3d[0] = old_values
arr3d

In [None]:
arr3d[1, 0]

In [None]:
x = arr3d[1]
x
x[0]

#### Indexing with slices

In [31]:
arr
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [33]:
print(arr2d)
arr2d[:2]

[[1 2 3]
 [4 5 6]
 [7 8 9]]


array([[1, 2, 3],
       [4, 5, 6]])

In [40]:
arr2d[:2, 1:]

array([[0, 0],
       [0, 0]])

In [35]:
arr2d[1, :2]

array([4, 5])

In [36]:
arr2d[:2, 2]

array([3, 6])

In [37]:
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [38]:
arr2d[:2, 1:] = 0
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

### Boolean Indexing

In [None]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
names
data

In [None]:
names == 'Bob'

In [None]:
data[names == 'Bob']

In [None]:
print(names != 'Bob')
data[~(names == 'Bob')]

In [None]:
cond = names == 'Bob'
data[~cond]

In [None]:
data[data < 0] = 0
data

### Transposing Arrays and Swapping Axes

In [41]:
arr = np.arange(15).reshape((3, 5))
print(arr)
arr.T

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [43]:
arr = np.random.randn(6, 3)
arr
np.dot(arr, arr)

ValueError: shapes (6,3) and (6,3) not aligned: 3 (dim 1) != 6 (dim 0)

In [44]:
arr = np.arange(16).reshape((2, 2, 4))
arr
arr.transpose((1, 0, 2))

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [45]:
arr.swapaxes(1, 2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

### Expressing Conditional Logic as Array Operations

In [None]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

In [None]:
result = [(x if c else y)
          for x, y, c in zip(xarr, yarr, cond)]
result

In [None]:
result = np.where(cond, xarr, yarr)
result

### Mathematical and Statistical Methods

In [49]:
arr = np.random.randn(5, 4)
print(arr)
# arr.mean()
# np.mean(arr)
# arr.sum()

[[ 0.05000936  0.67021559  0.85296503 -0.95586885]
 [-0.02349332 -2.30423388 -0.65246884 -1.21830198]
 [-1.33260971  1.07462269  0.72364151  0.69000185]
 [ 1.00154344 -0.50308739 -0.62227423 -0.92116861]
 [-0.72621349  0.22289555  0.0513161  -1.15771947]]


In [50]:
print(arr.sum(axis=1))
print(arr.sum(axis=0))

[ 0.61732113 -4.19849802  1.15565634 -1.04498678 -1.60972131]
[-1.03076372 -0.83958744  0.35317957 -3.56305705]


In [51]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr.cumsum()

array([ 0,  1,  3,  6, 10, 15, 21, 28])

In [None]:
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr
arr.cumsum(axis=0)
arr.cumprod(axis=1)

### Methods for Boolean Arrays

In [52]:
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive values

47

In [56]:
bools = np.array([False, False, False, False])
print(bools.any())
print("\n")
print(bools.all())

False


False


### Sorting

In [58]:
arr = np.random.randn(6)
print(arr)
arr.sort()
print(arr)

[-0.39570075 -0.2894359  -0.73429707 -0.72850468  0.83877507  0.26689321]
[-0.73429707 -0.72850468 -0.39570075 -0.2894359   0.26689321  0.83877507]


### Unique and Other Set Logic

In [59]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
print(np.unique(names))
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

['Bob' 'Joe' 'Will']


array([1, 2, 3, 4])

In [None]:
sorted(set(names))

## Conclusion