# Live Demo 04 - Numpy

Numpy import statement

In [None]:
import numpy as np

### Array initialization

#### 1-D case

In [None]:
arr = np.array([1, 2, 3])
arr

#### 2-D case

In [None]:
brr = np.array([[1.1, 2.2, 6.6],[3.3, 4.4, 7.7]])
brr

#### Dimensions and shape

- ndim =  the number of dimension
- shape = the length of each dimension
- size = number of elements in the array

In [None]:
print(arr.ndim)
print(brr.ndim)

In [None]:
brr.shape

In [None]:
brr.size

#### Data Type

In [None]:
arr.dtype

In [None]:
brr.dtype

In [None]:
arr = arr.astype('int8')
arr.dtype

In [None]:
# size in bytes of each element
arr.itemsize

### Creation with Arange: 

In [None]:
arr = np.arange(0, 100)
arr

#### Element access and slicing

In [None]:
arr[5]

In [None]:
arr[-1:0:-5]

In [None]:
arr = np.arange(0, 100, 2)

In [None]:
arr = np.arange(0, 100, 0.25)

In [None]:
arr = np.arange(0, 100)
len(arr)

In [None]:
arr.shape

In [None]:
np.reshape?

**Exercise:** reshape the `arr` array into a $10\times10$ and a $20\times5$ matrix

In [None]:
matrix = arr.reshape(10, 10)
matrix.shape

In [None]:
len(matrix)

In [None]:
matrix = arr.reshape(20, -1)
matrix.shape

In [None]:
matrix_3d = arr.reshape(5, 5, -1)
print('matrix_3d: ndim = {}, shape = {}, size = {}, dtype = {}'.format(
    matrix_3d.ndim, matrix_3d.shape, matrix_3d.size, matrix_3d.dtype
))

In [None]:
matrix

In [None]:
matrix_3d

### Accessing elements and slicing in 3D

In [None]:
matrix_3d[0,:,:]

In [None]:
matrix_3d[0,1,:]

In [None]:
matrix_3d[0,1,3]

In [None]:
matrix_3d[:,0,:]

In [None]:
matrix_3d[:,:,0]

In [None]:
first_frame, second_frame = matrix_3d[:,:,0], matrix_3d[:,:,1]
first_frame

In [None]:
first_frame.shape

In [None]:
# slicing
first_frame[1:3, 1:4]

### Transposing an array

In [None]:
transposed_frame = first_frame.T
transposed_frame

In [None]:
transposed_frame = first_frame.swapaxes(0,1)
transposed_frame

## Stacking, concatenating, filtering

In [None]:
arr1 = np.arange(10, 30, 2)
arr1

**Exercise:** initialize a 10-element 1-D array `arr2` of 5s and use `np.vstack` to stack it vertically (i.e. row-wise) to `arr1`

In [None]:
# write your solution here:


Let's try horizontal stacking:

In [None]:
to_hstack = np.array([1, 2])
hstacked = np.hstack((stacked, to_hstack))

It did not work because the second array is a row array and dimensions don't match. 


In [None]:
print(f'First array shape is: {stacked.shape}. Second array shape is {to_hstack.shape}')

In [None]:
to_hstack = to_hstack[:, np.newaxis]
print(to_hstack)
hstacked = np.hstack((stacked, to_hstack))
hstacked

Finally, it is possible to concatenate arrays.

In [None]:
concatenated = np.concatenate((arr1, arr2))
concatenated

### Boolean Masking

In [None]:
np.random.seed(77)
arr = np.random.randint(0, 100, 20).reshape(4, 5)
arr

In [None]:
arr > 50

In [None]:
arr[arr > 50]

**Exercise:** replace al values smaller than 20 with 1000

In [None]:
# write your solution here:


**Exercise:** replace al values larger than 900 or smaller than 50 with -1

In [None]:
# write your solution here:


You can also use a wholly different array, provided that it has the right shape, to mask against a target array.

In [None]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 
                  'Joe', 'Bob', 'Will', 'Joe', 'Will', 
                  'Will', 'Bob', 'Will', 'Joe', 'Joe',
                  'Joe', 'Bob', 'Will', 'Joe', 'Will']).reshape(arr.shape)
print(names.dtype)
print(names.shape)
names == 'Bob'

In [None]:
arr [names == 'Bob'] = -9999
arr

## Vectorized methods/functions

In [None]:
mean_value = first_frame.mean()
mean_value

In [None]:
mean_values_by_column = first_frame.mean(axis=1)
mean_values_by_column

In [None]:
mean_values_by_row = first_frame.mean(axis=0)
mean_values_by_row

In [None]:
frame_sum = 0
for el in np.nditer(first_frame):
    frame_sum += el
frame_sum

In [None]:
frame_sum = first_frame.sum()
frame_sum

In [None]:
frame_sum = first_frame.sum(axis=0)
frame_sum

Let's time it

In [None]:
import math
print("Time taken for vectorised operation")
%time _ = np.log(np.arange(1, 20000))
print("Time taken for non-vectorised operation")
%time _ = [math.log(item) for item in range(1, 20000)]

Let's see how a user-defined function can be vectorized with NumPy using `np.vectorize()`:

In [None]:
def always_positive_diff(a, b):
    "Return a - b if a > b, otherwise return b - a"
    if a > b:
        return a - b
    else:
        return b - a

In [None]:
v_positive_diff = np.vectorize(always_positive_diff)

In [None]:
v_positive_diff([2, 4, 6, 8, 10], 5)

## Element-wise operations on arrays

In [None]:
np.add(first_frame, second_frame)

The one above is what is called a simmetrical square matrix

In [None]:
np.subtract(second_frame, first_frame)

In [None]:
np.multiply(second_frame, first_frame)

In [None]:
np.matmul(second_frame, first_frame)

In [None]:
np.exp(first_frame)

In [None]:
from scipy.linalg import expm
expm(first_frame)


## Broadcasting

The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes. Broadcasting provides a means of vectorizing array operations so that looping occurs in C instead of Python. It does this without making needless copies of data and usually leads to efficient algorithm implementations. There are some cases where broadcasting is a bad idea because it leads to inefficient use of memory that slows computation.

When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing (i.e. rightmost) dimensions and works its way left. Two dimensions are compatible when

* they are equal, or

* one of them is 1

If these conditions are not met, a ValueError: operands could not be broadcast together exception is thrown, indicating that the arrays have incompatible shapes. The size of the resulting array is the size that is not 1 along each axis of the inputs.

In [None]:
matrix = np.ones((3,5))


In [None]:
matrix

In [None]:
2 * matrix

In [None]:
np.arange(5) * matrix

In [None]:
np.arange(3) * matrix

Remember for broadcasting to work the trailing dimensions must be the same or one of them must be 1. Otherwise broadcasting fails

**Exercise:** convert the row array above `np.arange(3)` to a column array and verify that can be multiplied against `matrix`

In [None]:
row_vec = np.arange(3)
# write your solution here:


### Read a text file (i.e CSV) using `np.genfromtxt()`

In [None]:
dataset = np.genfromtxt(
    "../datasets/weight-height.csv",
    delimiter=',',
    names=True,
    dtype=('U8', 'f8', 'f8')
)

In [None]:
dataset.dtype

In [None]:
dataset

In [None]:
dataset.dtype.names

In [None]:
dataset.dtype.fields

In [None]:
dataset.dtype.fields["Gender"]

**Exercise:** extract gender, height and weight into three separate arrays. Then compute mean and std for height and weight, and count occurrences in gender.

In [None]:
# write your solution here:
