# NumPy for Numerical Analysis
Numeric Python (numpy.org)

In [2]:
# pip install numpy
# pip3 install numpy
# python -m pip install numpy
# python3 -m pip install numpy
# py -m pip install numpy
# !pip install numpy # if you really have to
import numpy as np

In [3]:
dir(np)

['ALLOW_THREADS',
 'AxisError',
 'BUFSIZE',
 'CLIP',
 'DataSource',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'RAISE',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'WRAP',
 '_CopyMode',
 '_NoValue',
 '_UFUNC_API',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__deprecated_attrs__',
 '__dir__',
 '__doc__',
 '__expired_functions__',
 '__file__',
 '__getattr__',
 '__git_version__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_add_newdoc_ufunc',
 '_distributor_init',
 '_financial_names',
 

We find a similar function to range called arange

In [4]:
help(np.arange)

Help on built-in function arange in module numpy:

arange(...)
    arange([start,] stop[, step,], dtype=None, *, like=None)
    
    Return evenly spaced values within a given interval.
    
    ``arange`` can be called with a varying number of positional arguments:
    
    * ``arange(stop)``: Values are generated within the half-open interval
      ``[0, stop)`` (in other words, the interval including `start` but
      excluding `stop`).
    * ``arange(start, stop)``: Values are generated within the half-open
      interval ``[start, stop)``.
    * ``arange(start, stop, step)`` Values are generated within the half-open
      interval ``[start, stop)``, with spacing between values given by
      ``step``.
    
    For integer arguments the function is roughly equivalent to the Python
    built-in :py:class:`range`, but returns an ndarray rather than a ``range``
    instance.
    
    When using a non-integer step, such as 0.1, it is often better to use
    `numpy.linspace`.
    
    


In [5]:
an_array = np.arange(10)
print(an_array)
print(type(an_array))

[0 1 2 3 4 5 6 7 8 9]
<class 'numpy.ndarray'>


In [6]:
python_list = [i for i in range(10000)]
numpy_array = np.arange(10000)

In [7]:
sum(python_list)

49995000

In [8]:
np.sum(numpy_array)

49995000

The key feature of NumPy is the performance benefit coming with the arrays compared to standard Python lists. We can measure the performance difference by using timeit module.

In [9]:
import timeit
dir(timeit)

['Timer',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_globals',
 'default_number',
 'default_repeat',
 'default_timer',
 'dummy_src_name',
 'gc',
 'itertools',
 'main',
 'reindent',
 'repeat',
 'sys',
 'template',
 'time',
 'timeit']

Compare standard python with NumPy

In [10]:
help(timeit.timeit)

Help on function timeit in module timeit:

timeit(stmt='pass', setup='pass', timer=<built-in function perf_counter>, number=1000000, globals=None)
    Convenience function to create Timer object and call timeit method.



In [11]:
# standard Python
standard_statement = \
'''
s = 0
for i in range(10000):
    s += i
'''
# standard Python with list comprehension
standard_plus_statement = \
'''
n = [i for i in range(10000)]
s = sum(n)
'''
# Numpy array
numpy_statement = \
'''
import numpy as np
n = np.arange(10000)
s = np.sum(n)
'''

In [15]:
timeit.timeit(standard_statement, number=1000)

0.24127004199999647

In [16]:
timeit.timeit(standard_plus_statement, number=1000)

0.16155320799998663

In [17]:
timeit.timeit(numpy_statement, number=1000)

0.012809333999996397

Where does this performance improvement come from? It is something about the way they use the memory.

In [162]:
n_standard = []
print(f"{hex(id(n_standard))} << Initial Memory Location")
for i in range(5):
    n_standard.append(i)
    print(f"{hex(id(n_standard))}", end=':')
    print(f"{[hex(id(x)) for x in n_standard]}")

0x11102cb00 << Initial Memory Location
0x11102cb00:['0x1011c40d0']
0x11102cb00:['0x1011c40d0', '0x1011c40f0']
0x11102cb00:['0x1011c40d0', '0x1011c40f0', '0x1011c4110']
0x11102cb00:['0x1011c40d0', '0x1011c40f0', '0x1011c4110', '0x1011c4130']
0x11102cb00:['0x1011c40d0', '0x1011c40f0', '0x1011c4110', '0x1011c4130', '0x1011c4150']


In [163]:
n = n_numpy = np.array([])
print(f"{hex(id(n_numpy))} << Initial Memory Location")
for i in range(5):
    n_numpy = np.append(n_numpy, np.array(i))
    print(f"{hex(id(n_numpy))}", end=':')
    print(f"{[hex(id(x)) for x in n_numpy]}")

0x111ab9950 << Initial Memory Location
0x11153d2f0:['0x111532a90']
0x105a0a4f0:['0x1115328d0', '0x111532a90']
0x105a09e90:['0x1115329b0', '0x1115328d0', '0x1115329b0']
0x105a0a4f0:['0x111532a90', '0x1115328d0', '0x111532a90', '0x1115328d0']
0x105a09e90:['0x1115329b0', '0x111532a90', '0x1115329b0', '0x111532a90', '0x1115329b0']


In [161]:
# if you want to go deeper, you can check a memory address by using the following code
#import ctypes
#g = (ctypes.c_char*40000).from_address(0x1103bbdc0)
#print(g.value.decode("utf-8", errors='ignore'))

## What makes numpy faster or slower?
Numpy arrays are stored at one continuous place in memory, so processes can access and manipulate them very efficiently.

This behavior is called locality of reference in computer science.

This is the main reason why NumPy is SOMETIMES faster than lists.

In [164]:
standard_append = \
'''
n = []
for i in range(10000):
    n.append(i)
'''
numpy_append = \
'''
import numpy as np
n = np.array([])
for i in range(10000):
    n = np.append(n, np.array(i))
'''

In [165]:
timeit.timeit(standard_append, number=100)

0.04325383400009741

In [166]:
timeit.timeit(numpy_append, number=100)

2.9533894169999257

## NumPy Arrays

0-D Array: Scalar

1-D Array: Vector

2-D Array: Matrix

N-D Array

In [167]:
a_scalar = np.array(17)
print(a_scalar, type(a_scalar))

17 <class 'numpy.ndarray'>


In [168]:
a_vector = np.array([3, 5])
print(a_vector, type(a_vector))

[3 5] <class 'numpy.ndarray'>


In [169]:
a_vector_from_tuple = np.array((3, 5, 7))
print(a_vector_from_tuple, type(a_vector_from_tuple))

[3 5 7] <class 'numpy.ndarray'>


In [170]:
a_vector_from_set = np.array({3, 3, 3, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7})
print(a_vector_from_set, type(a_vector_from_set))

{3, 5, 7} <class 'numpy.ndarray'>


In [171]:
# indexing and slicing
print(a_vector[1])
print(a_vector_from_tuple[1:])
print(a_vector_from_set[1])

5
[5 7]


IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

In [172]:
an_array_with_chars = np.array(['a', 'Bora', 'Canbula', 7])
print(an_array_with_chars, type(an_array_with_chars))

['a' 'Bora' 'Canbula' '7'] <class 'numpy.ndarray'>


In [173]:
two_d_array = np.array([[1, 2, 3], [4, 5, 6]])
print(two_d_array, type(two_d_array))

[[1 2 3]
 [4 5 6]] <class 'numpy.ndarray'>


In [174]:
# indexing with multiple dimensions
print(two_d_array[:, 1:])
# higher dimensions
m_1 = np.array([[11, 12, 13],
                [14, 15, 16],
                [17, 18, 19]])
m_2 = np.array([[21, 22, 23],
                [24, 25, 26],
                [27, 28, 29]])
m_3 = np.array([[31, 32, 33],
                [34, 35, 36],
                [37, 38, 39]])
m = np.array([m_1, m_2, m_3])
print(m.shape, m.ndim)
print(m[:, 2, :].reshape(9))

[[2 3]
 [5 6]]
(3, 3, 3) 3
[17 18 19 27 28 29 37 38 39]


Type is not different so let's look at number of dimensions

In [175]:
print(a_scalar.ndim)
print(a_vector.ndim)
print(two_d_array.ndim)

0
1
2


Number of items in an array gives you the shape

In [176]:
print(a_scalar.shape)
print(a_vector.shape)
print(two_d_array.shape)

()
(2,)
(2, 3)


In [177]:
a_matrix = np.array([[1, 2, 3],
                     [4, 5, 6],
                     [7, 8, 9]])
print(a_matrix)
print(a_matrix.ndim)
print(a_matrix.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
2
(3, 3)


In [178]:
a_matrix.reshape(9, 1)
print(a_matrix)
print(a_matrix.ndim)
print(a_matrix.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
2
(3, 3)


In [179]:
print(a_matrix.reshape(9, 1))

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]


In [181]:
np.array([[1, 2, 3], [4, 5, 6]]).shape

(2, 3)

In [182]:
n_1 = [1, 2, 3]
n_2 = [4, 5, 6]
n_3 = [7, 8, 9]
n = np.array([n_1 + n_2 + n_3])

In [183]:
n

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [184]:
n = np.arange(10)
n.reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [185]:
np.arange(10).reshape(2, -1)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [122]:
n.reshape(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [128]:
a = np.array([0, 1, 2, 3, 4], ndmin=2)

In [129]:
a

array([[0, 1, 2, 3, 4]])

## View and Copy Concept in NumPy

* view affected by the changes made to the original array
* copy not affected by the changes made to the original array

In [199]:
n = np.arange(10)

In [200]:
m = n.view() # just returns a view of the array
print(n)
print(m)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]


In [201]:
m[0] = 7 # change the values of n

In [203]:
# both of them are affected
print(n)
print(m)

[7 1 2 3 4 5 6 7 8 9]
[7 1 2 3 4 5 6 7 8 9]


In [204]:
n = np.arange(10)
m = n.copy()
print(n)
print(m)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]


In [205]:
m[0] = 7

In [207]:
# the original array is not affected from its copy
print(n)
print(m)

[0 1 2 3 4 5 6 7 8 9]
[7 1 2 3 4 5 6 7 8 9]


In [209]:
# you can check if an array is a view or not
n = np.arange(10)
n_view = n.view()
n_copy = n.copy()
print(n.base)
print(n_view.base) # this one returns the original array
print(n_copy.base)

None
[0 1 2 3 4 5 6 7 8 9]
None


## Data Types in Python vs NumPy

In [210]:
np.typecodes

{'Character': 'c',
 'Integer': 'bhilqp',
 'UnsignedInteger': 'BHILQP',
 'Float': 'efdg',
 'Complex': 'FDG',
 'AllInteger': 'bBhHiIlLqQpP',
 'AllFloat': 'efdgFDG',
 'Datetime': 'Mm',
 'All': '?bhilqpBHILQPefdgFDGSUVOMm'}

In [211]:
np.sctypes # scalar types?

{'int': [numpy.int8, numpy.int16, numpy.int32, numpy.int64],
 'uint': [numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64],
 'float': [numpy.float16, numpy.float32, numpy.float64],
 'complex': [numpy.complex64, numpy.complex128],
 'others': [bool, object, bytes, str, numpy.void]}

### Default Python Data Types

- strings
- integer
- float
- boolean
- complex

### Data Types in NumPy

- i integer
- b boolean
- u unsigned integer
- f float
- c complex float
- m timedelta
- M datetime
- O object
- S string
- U unicode string
- V fixed chunk of memory for other type

In [212]:
# check the data type of NumPy array
a = np.array([1, 2, 3])
a.dtype

dtype('int64')

In [213]:
# why int64? create array with a defined data type
a = np.array([1, 2, 3], dtype='i')
a.dtype

dtype('int32')

In [214]:
# why int32? try again
a = np.array([1, 2, 3], dtype=np.int8)
a.dtype

dtype('int8')

In [217]:
# check the limits
a = np.array([128], dtype=np.int8)
print(a.dtype)
print(a.dtype.name)
print(a)

int8
int8
[-128]


In [218]:
# what does unsigned mean?
a = np.array([256], dtype=np.uint8)
print(a.dtype)
print(a.dtype.name)
print(a)

uint8
uint8
[0]


In [219]:
# conversion errors
a = np.array(['a', '2', '3'], dtype='i')
print(a.dtype)
print(a)

ValueError: invalid literal for int() with base 10: 'a'

In [220]:
try:
    a = np.array(['a', '2', '3'], dtype='i')
except ValueError:
    print('There is something wrong with the values!')
finally:
    a = np.array([], dtype='i')
print(a.dtype)
print(a)

There is something wrong with the values!
int32
[]


### Initial Placeholders

In [221]:
print('Empty')
print(np.empty((4,4), dtype='f'))
print('Zeros')
print(np.zeros((2,3)))
print(np.zeros((2,2), dtype='i'))
print('Ones')
print(np.ones((2,3,4), dtype=np.complex128))
print(np.array([complex(1) for _ in range(24)]).reshape(2, 3, 4))
print('Linear Space')
print(np.linspace(1, 2, 10))
print('Filled with a default value')
print(np.full((3,3), 5))
print('Identity Matrix')
print(np.eye(3))
print('Random')
print(np.random.random((3,3)))
print(np.random.randint(0, 99, (3,3)))

Empty
[[0.     0.     0.     1.875 ]
 [0.     2.     0.     2.125 ]
 [0.     2.25   0.     2.3125]
 [0.     2.375  0.     2.4375]]
Zeros
[[0. 0. 0.]
 [0. 0. 0.]]
[[0 0]
 [0 0]]
Ones
[[[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]

 [[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]]
[[[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]

 [[1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]
  [1.+0.j 1.+0.j 1.+0.j 1.+0.j]]]
Linear Space
[1.         1.11111111 1.22222222 1.33333333 1.44444444 1.55555556
 1.66666667 1.77777778 1.88888889 2.        ]
Filled with a default value
[[5 5 5]
 [5 5 5]
 [5 5 5]]
Identity Matrix
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
Random
[[0.47416884 0.88719115 0.33393405]
 [0.79485331 0.47398417 0.08711345]
 [0.98516842 0.97384434 0.31851779]]
[[63 36 97]
 [74 59 77]
 [54 83  0]]


## Array Operations
* Element-wise Operations
* Aggregate Operations

In [222]:
# default python list
a_py = [1, 2, 3]
b_py = [4, 5, 6]
c_py = a_py + b_py

In [223]:
# Element-wise operation: for each element
a_np = np.array([1, 2, 3])
b_np = np.array([4, 5, 6])
c_np = a_np + b_np

In [224]:
print(c_py)
print(c_np)

[1, 2, 3, 4, 5, 6]
[5 7 9]


In [225]:
c_py * 3

[1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]

In [226]:
c_np * 3

array([15, 21, 27])

In [227]:
#c_np ** 2
#np.power(c_np, 2)
c_np + 1 # this is broadcasting (any -> np.array)

array([ 6,  8, 10])

In [228]:
c_np_like_python = np.append(a_np, b_np)
print(c_np_like_python)

[1 2 3 4 5 6]


In [229]:
# Aggregate operations
n = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print(sum(n))
print(np.sum(n))
print(np.sum(n, axis=0))
print(np.sum(n, axis=1))
#print(np.sum(n, axis=2))
print('')
try:
    print(np.sum(n, axis=2))
except np.AxisError:
    for i in range(n.ndim):
        print(np.sum(n, axis=i))
finally:
    pass

[12 15 18]
45
[12 15 18]
[ 6 15 24]

[12 15 18]
[ 6 15 24]


# Homework to Submit to GitHub Week04 folder

## Function Description

This function creates an `n-by-n` numpy array populated with random integers that have up to `d` digits. It then replaces the central `m-by-m` part of this array with `-1`.

### Parameters

- **d**: Number of digits for the random integers.
- **n**: Size of the main array.
- **m**: Size of the central array that will be replaced with `-1`.

### Returns

- A modified numpy array with its center replaced with `-1`.

### Exceptions

- **ValueError**: This exception is raised in the following scenarios:
  - If `m > n`
  - If `d <= 0`
  - If `n < 0`
  - If `m < 0`
