# Intro to Numpy Chapter from the Python DS Handbook

In [6]:
!pip install numpy



In [10]:
import numpy as np

## Understanding Data Types in Python

In [13]:
result = 0
for i in range(100):
    result += i

In [15]:
L = list(range(10))
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [16]:
type(L[0])

int

In [17]:
L2 = [str(c) for c in L]
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [18]:
type(L2[0])

str

In [19]:
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3] # Heterogenous list

[bool, str, float, int]

To allow flexible types, each one is a complete python object, but this is redundant. It is more efficient to save in a fixed-type array. An array has a single pointer to the data, the list has a pointer to a block of pointers, which points to the full object. Lists are type-flexible, but arrays are more efficient.

### Fixed-Type Arrays in Python

In [20]:
import array

In [21]:
L = list(range(10))
A = array.array('i',L) # i is a typecode indicating the contents are integers
A # A is a basic python array object

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
# Integer array
np.array([1,2,3,4,5]) 

array([1, 2, 3, 4, 5])

In [23]:
# NumPy Arrays are constrained to contain the same type, it will upcast if possible:
np.array([3.14,4,2,3])

array([3.14, 4.  , 2.  , 3.  ])

In [24]:
# To explicitly set data, we can use dtype keyword
np.array([1,2,3,4,5], dtype='float32')

array([1., 2., 3., 4., 5.], dtype=float32)

In [25]:
# Finally, NP arrays can be multi-dimensional (like matrices)
np.array([range(i, i+3) for i in [2,4,5]])

array([[2, 3, 4],
       [4, 5, 6],
       [5, 6, 7]])

### Creating Arrays from Scratch

In [27]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype = int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [28]:
# Create a 3x5 floating-point array filled with ones
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [29]:
# Create a 3x5 array filled with 3.14
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [30]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [31]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [32]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
np.random.random((3, 3))

array([[0.14948278, 0.42302328, 0.39438639],
       [0.25099023, 0.21376105, 0.61499804],
       [0.87313819, 0.41635226, 0.02228409]])

In [33]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))

array([[-0.79074848, -1.58148111,  0.29329433],
       [ 0.0948431 , -1.32165123,  0.25485223],
       [ 0.32921448, -0.13628748,  2.06258279]])

In [34]:
# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))

array([[5, 8, 5],
       [3, 2, 7],
       [0, 4, 2]])

In [35]:
# Create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [36]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location
np.empty(3)

array([1., 1., 1.])

### NumPy Standard Data Types

In [37]:
np.zeros(10, dtype = "int16")

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [38]:
np.zeros(10, dtype = np.int16) # Equivalent to above

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

## Basics of NP Arrays

Data manipulation is synonymous with NP array manipulation, even Pandas is built on NP array. The following covers the building blocks of NumPy arrays. The basics of array manipulation:
- *Attributes of Arrays*: Determining size, shape, memory consumption, and data types of arrays
- *Indexing of Arrays*: Gettting and setting values of array elements
- *Slicing of Arrays*: Getting and setting smaller subarrays within a larger array
- *Reshaping of Arrays*: Changing the shape of a given array
- *Joining and Splitting of Arrays*: Combining multiple arrays into one, and splitting one array into many

### NumPy Array Attributes

In [40]:
np.random.seed(0) # Set seed for reproducibility

x1 = np.random.randint(10, size=6)  # One-dimensional array
x2 = np.random.randint(10, size=(3, 4))  # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array

# Each array has ndim (Dimensions), shape (size of each dimension), and size (Total array size)

In [41]:
print("x3 ndim: ", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size: ", x3.size)

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60


In [42]:
print("dtype:", x3.dtype)

dtype: int64


In [43]:
print("itemsize:", x3.itemsize, "bytes")
print("nbytes:", x3.nbytes, "bytes") # itemsize * size

itemsize: 8 bytes
nbytes: 480 bytes


### Array Indexing: Accessing Single Elements

In [44]:
x1

array([5, 0, 3, 3, 7, 9])

In [49]:
print(x1[0], x1[4], x1[-1], x1[-2])

5 7 9 7


In [53]:
print(x2)
print(x2[0,0])
print(x2[2,0])
print(x2[2,-1])

[[3 5 2 4]
 [7 6 8 8]
 [1 6 7 7]]
3
1
7


- Since arrays are fixed type, trying to add a new item to the array of a new type will change it back to original type (Silently truncate, etc)