Python Data Science Handbook by Jake VanderPlas 
[Link](https://jakevdp.github.io/PythonDataScienceHandbook/)

# Chapter: Introduction to NumPy

## Creating arrays from scratch

In [1]:
#Library 
import numpy as np

In [2]:
#create length-10 integer array filled with zeros
np.zeros(10, dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [3]:
#create a 3x5 floating-point array filled with ones
np.ones((3,5), dtype = float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [4]:
#create a 3x5 array filled with 3.14
np.full((3,5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [5]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [6]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [2]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
np.random.random((3,3))

array([[0.56714539, 0.81295775, 0.58304396],
       [0.74690648, 0.0810279 , 0.39382211],
       [0.12988581, 0.24618314, 0.79799673]])

In [3]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3,3))

array([[-0.84585656, -2.22241475,  0.82754861],
       [-0.3039013 , -0.25228727,  0.21363343],
       [-1.20697908, -0.04491187,  1.49029385]])

In [5]:
# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3,3))

array([[0, 0, 7],
       [2, 5, 2],
       [7, 0, 8]])

In [6]:
# Create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [7]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location

np.empty(3)

array([1., 1., 1.])

## NumPy Array Attributes

In [8]:
import numpy as np
np.random.seed(0) 

In [9]:
x1 = np.random.randint(10, size=6) # 1D array
x2 = np.random.randint(10, size=(3,4)) # 2D array
x3 = np.random.randint(10, size=(3,4,5)) # 3D array

In [14]:
print("x3 ndim: ", x3.ndim)
print("x3 shape: ", x3.shape)
print("x3 size: ", x3.size)
print("x3 dtype: ", x3.dtype)
print("itemsize: ", x3.itemsize, "bytes")
print("nbytes: ", x3.nbytes, "bytes")

x3 ndim:  3
x3 shape:  (3, 4, 5)
x3 size:  60
x3 dtype:  int64
itemsize:  8 bytes
nbytes:  480 bytes


### Array Indexing: Accessing Single Elements

In [19]:
print("1D array:", x1)
print("first index: ", x1[0])
print("fourth index: ", x1[4])
print("index at end of array: ", x1[-1]) #using negative indicies
print("2nd to last index of array: ", x1[-2])

1D array: [5 0 3 3 7 9]
first index:  5
fourth index:  7
index at end of array:  9
2nd to last index of array:  7


In [20]:
print("2D array: ", x2)
# access items using comma-separated tuple of indices

2D array:  [[3 5 2 4]
 [7 6 8 8]
 [1 6 7 7]]


In [21]:
x2[0,0]

3

In [22]:
x2[2,0]

1

In [23]:
x2[2,-1]

7

In [24]:
# modify values
x2[0,0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

### Array Slicing: Accessing Subarrays

In [None]:
# x[start:stop:step]
# start=0, stop=size of dimension, step=1

##### One-dimensional subarrays

In [27]:
x = np.arange(10)

In [28]:
# first 5 elements
x[:5]

array([0, 1, 2, 3, 4])

In [29]:
# elements after index 5
x[5:]

array([5, 6, 7, 8, 9])

In [30]:
# middle of subarray
x[4:7]

array([4, 5, 6])

In [31]:
# every other element
x[::2]

array([0, 2, 4, 6, 8])

In [32]:
# every other element starting at index 1
x[1::2]

array([1, 3, 5, 7, 9])

In [33]:
# reverse an array
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [34]:
# reversed every other from index 5
x[5::-2]

array([5, 3, 1])

### Multi-dimensional subarrays

In [35]:
# 2D array
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [36]:
#show two rows, three cols
x2[:2, :3]

array([[12,  5,  2],
       [ 7,  6,  8]])

In [41]:
# all rows, every other column
x2[:3, ::2]

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [42]:
# reverse subarray
x2[::-1, ::-1]

array([[ 7,  7,  6,  1],
       [ 8,  8,  6,  7],
       [ 4,  2,  5, 12]])

In [43]:
x2[1:2, 1:2]

array([[6]])

In [55]:
x2[1::2, 1::2]
#array[row, col]
#[start:stop:slice]

array([[6, 8]])

### accessing array rows and columns

One commonly needed routine is accessing of single rows or columns of an array. This can be done by combining indexing and slicing, using an empty slice marked by a single colon (:):

In [56]:
# first col of x2
print(x2[:, 0])

[12  7  1]


In [57]:
# first row of x2
print(x2[0, :])

[12  5  2  4]


In [58]:
#for row access, can drop col
print(x2[2])

[1 6 7 7]


### Subarrays as no copy views
One important–and extremely useful–thing to know about array slices is that they return views rather than copies of the array data. This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies.

In [59]:
print(x2)

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [60]:
#Let's extract a 2×2 subarray from this:
sub_x2 = x2[:2, :2]

In [61]:
print(sub_x2)

[[12  5]
 [ 7  6]]


In [63]:
#modify sub array will modify original array
sub_x2[1,1] = 22
print(sub_x2)

[[12  5]
 [ 7 22]]


In [64]:
print(x2)

[[12  5  2  4]
 [ 7 22  8  8]
 [ 1  6  7  7]]


### creating copies of arrays 
Using the copy() method

In [67]:
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[12  5]
 [ 7 22]]


In [66]:
x2_sub_copy[1:2] = 55
print(x2_sub_copy)

[[12  5]
 [55 55]]


In [68]:
print(x2) #unmodified

[[12  5  2  4]
 [ 7 22  8  8]
 [ 1  6  7  7]]


# Reshaping arrays
Using the **reshape** method

In [71]:
# lets say you want to put numbers 1->9 in a 3x3 array
a = np.arange(1,10).reshape((3,3))
print(a)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


#notes
- size of initial array must match size of reshaped array
- rehape uses a no-copy view of initial array
- common reshaping pattern is conversion of a 1D array -> 2D col or row matrix
- can also be done using *newaxis* instead of *reshape*

In [72]:
x = np.array([1,2,3])

# row vector via reshape
x.reshape((1,3))

array([[1, 2, 3]])

In [73]:
# row vector via new axis
x[np.newaxis, :]

array([[1, 2, 3]])

In [74]:
# column vector via reshape
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [75]:
# coltmn vector via new axis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

### Array concatenation and splitting

### Concatenation of arrays
- aka joining of two arrays in NumPy
- uses routines **np.concatenate** , **np.vstack**, **np.hstack**

In [83]:
x = np.array([1,2,3])
y = np.array([8,9,10])
z = np.array([16,17,18])
np.concatenate([x,y,z])

array([ 1,  2,  3,  8,  9, 10, 16, 17, 18])

In [84]:
#2D concatenate
grid = np.array([[1,2,3],
                 [4,5,6]])

In [85]:
#concatenate along first axis
np.concatenate([grid, grid])


array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [86]:
# concatenate across second axis (zero-indexed)
np.concatenate([grid, grid], axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

For working with arrays of mixed dimensions, it can be clearer to use the np.vstack (vertical stack) and np.hstack (horizontal stack) functions:

In [87]:
x - np.array([1,2,3])
grid = np.array([[10,11,12],
                 [12,14,15]])

In [88]:
#vertically stack arrays
np.vstack([x, grid])

array([[ 1,  2,  3],
       [10, 11, 12],
       [12, 14, 15]])

In [90]:
# horizontal stack
y = np.array([[99],
              [99]])
np.hstack([grid, y])

array([[10, 11, 12, 99],
       [12, 14, 15, 99]])

- similarly **dstack** will stack array along the third axis

### splitting arrays
- using functions **np.split**, **np.hsplit**, **np.vsplit**

In [91]:
x = [1, 2, 3, 99, 99, 3, 2, 1]

In [102]:
x1, x2, x3 = np.split(x, [3,5])
print(x1, x2,x3)

[1 2 3] [99 99] [3 2 1]


In [104]:
grid = np.arange(16).reshape((4,4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [105]:
upper, lower = np.vsplit(grid, [2])

In [108]:
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [109]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


- similarly, **np.dsplit** will split arrays along the third axis

## Computation on NumPy Arrays: Universal Functions

In [110]:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output
        
values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])