# Numpy

In this lab, we will see how to create array with certain data types, manipulate array, select elements from arrays, and load dataset into array. Such functions are useful for manipulating data and understanding the functionalities of other common Python data packages.

In [2]:
# import a library
import numpy as np

# Array Creation

In [None]:
# Arrays are displayed as a list or list of lists and can be created through list as well. When creating an
# array, we pass in a list as an argument in numpy array
a = np.array([1, 2, 3])
a

In [None]:
# We can print the number of dimensions of a list using the ndim attribute
a.ndim

In [None]:
# If we pass in a list of lists in numpy array, 
# we create a multi-dimensional array, for instance, 
# a matrix
b = np.array([[1,2,3],[4,5,6]])
b

In [None]:
# We can print out the length of each dimension 
# by calling the shape attribute, which returns a tuple
b.shape

In [None]:
# We can also check the type of items in the array
a.dtype

In [None]:
# Besides integers, floats are also accepted in numpy arrays
c = np.array([2.2, 5, 1.1])
c.dtype.name

In [None]:
# Look at the data in our array
c

In [None]:
# Sometimes we know the shape of an array that we want to create, but not what we want to be in it. numpy
# offers several functions to create arrays with initial placeholders, such as zero's or one's.
# Lets create two arrays, both the same shape but with different filler values
d = np.zeros((2,3))
d

In [None]:
e = np.ones((2,3))
e

In [None]:
# We can also generate an array with random numbers
np.random.rand(2,3)

In [None]:
# We can also create a sequence of numbers in an array with the arange() function. 
# The fist argument is the starting bound and the second argument is the ending bound, 
# and the third argument is the difference between each consecutive numbers

# Let's create an array of every even number from ten (inclusive) to fifty (exclusive)
f = np.arange(10, 50, 2)
f

In [None]:
# For two dimensional arrays, we can do the same thing for each row or column
# let's create an array with 15 elements, ranging from 1 to 15, 
# with a dimension of 3X5
b = np.arange(1,16,1).reshape(3,5)
b

In [None]:
# if we want to generate a sequence of floats, we can use the linspace() function. 
# In this function the third argument isn't the difference between two numbers, 
# but the total number of items you want to generate
np.linspace( 0, 2, 15 ) # 15 numbers from 0 (inclusive) to 2 (inclusive)

# Array Operations

In [None]:
# Arithmetic operators on array apply elementwise.

# Let's create a couple of arrays
a = np.array([1,2,3])
b = np.array([2,4,6])

# Now let's look at a times b
c = a*b
c

In [None]:
# Look at b minus a
d = b-a
d

In [None]:
# b divided by a
e = b/a
e

### Quiz: Comparison of NYC temperature(°F) with Nairobi temperature(°C) in July

In [23]:
# import a library
import numpy as np

# With numpy we can easily convert a number of fahrenheit values to celsius

# Let's create an array of typical NYC fahrenheit values in July
farenheit = np.______(__________)

# Typical Nairobi celsius values in July is around 10°C-25°C
# We can check which city is warmer in July!
# Formula for conversion is (°C = (°F − 32) × 5/9)

In [None]:
# typical NYC celsius values are:
celcius = ___________________
celcius

In [None]:
# Besides elementwise manipulation, it is important to know that numpy supports matrix manipulation. Let's
# look at matrix product. if we want to do elementwise product, we use the "*" sign
A = np.array([[1,1],[0,1]])
B = np.array([[2,0],[3,4]])
A*B

In [None]:
# if we want to do matrix product, we use the "@" sign or use the dot function

#matrix product is computed as follows:
# A = array([[a, b],     B = array([[e, f],
#          [c, d]])               [g, h]])
# 
# A@B = array([[(a*e+b*g), (a*f+b*h)],
#            [(c*e+d*g), (c*f+d*h)]])
A@B

In [None]:
A.shape

In [None]:
# Only when second dimension of A and first dimension of B matches, we can compute matrix product
B.shape

In [None]:
# You don't have to worry about complex matrix operations, 
# but it's important to know that numpy is capable of doing both 
# element-wise operations (the asterix) as well as 
# matrix-level operations (the @ sign).

In [None]:
# Numpy arrays have many interesting aggregation functions 
# on them, such as  sum(), max(), min(), and mean()
array = np.array([[5.10, 4.20],
                  [3.00, 4.40]])
array.sum()

In [None]:
array.max()

In [None]:
array.min()

In [None]:
array.mean()

# Indexing, Slicing and Iterating

Indexing, slicing and iterating are extremely important for data manipulation and analysis because these techniques allow us to select data based on conditions, and copy or update data.

## Indexing

In [None]:
# First we are going to look at integer indexing. 
# A one-dimensional array, works in similar ways as a list.
a = np.array([1,3,5,7])
a[2]

In [None]:
# For multidimensional array, we need to use integer array indexing.
a = np.array([[1,2], [3, 4], [5, 6]])
a

In [None]:
# if we want to select one certain element, we can do so by entering the index, which is comprised of two
# integers the first being the row, and the second the column
a[1,1] # remember in python we start at 0!

In [None]:
a[0:2,1]

In [None]:
a[1,0:2]

In [None]:
# if we want to get multiple elements 
# we can enter the indices directly into an array function
np.array([a[0, 0], a[1, 1], a[2, 1]])

In [None]:
# we can also do that by using another form of array indexing, 
# which essentiall "zips" the first list and the
# second list up
print(a[[0, 1, 2], [0, 1, 1]])

## Boolean Indexing

In [None]:
# Boolean indexing allows us to select arbitrary elements based on conditions. 
a = np.array([[1,2], [3, 4], [5, 6]])
a >3

In [None]:
# We can then place this array of booleans like a mask over 
# the original array to return a one-dimensional 
# array relating to the true values.
a[a>3]

In [None]:
# As we will see, this functionality is essential in the pandas.

## Slicing

In [None]:
# Slicing is a way to create a sub-array based on the original array. 
a = np.array([0,1,2,3,4,5])
a[:3]

In [None]:
# By putting 2:4 in the bracket, we get elements from 
# index 2 to index 3 (excluding index 4)
a[2:4]

In [None]:
# For multi-dimensional arrays, it works similarly.
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
a

In [None]:
# First, if we put one argument in the array, we would get all the elements
# from the first (0th) and second row (1th)
a[:2]

In [None]:
# If we add another argument to the array, we get the first 
# two rows but then the second and third column values only
a[:2, 1:3]

In [None]:
# So, in multidimensional arrays, the first argument is 
# for selecting rows, and the second argument is for 
# selecting columns

In [None]:
# It is important to realize that a slice of an array is a view into the same data. This is called passing by
# reference. So modifying the sub array will consequently modify the original array

sub_array = a[:2, 1:3]
print("sub array index [0,0] value before change:", sub_array[0,0])
sub_array[0,0] = 50
print("sub array index [0,0] value after change:", sub_array[0,0])
print("original array index [0,1] value after change:", a[0,1])