# Friday, Week 1: Module
# Array Computing with Numpy

__Learning Objective:__ introduce the numpy module for array computing, and describe some of its features as well as useful documentation.

In [1]:
import numpy as np

# Array Creation Techniques:

https://docs.scipy.org/doc/numpy/reference/routines.array-creation.html

__Example:__ Create a 1D array of numbers from 0 to 9:

In [2]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

__Example:__ Create a 1D array of zeros with length 15:

In [3]:
arr = np.zeros(15)
arr

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

__Example:__ Create two 1D arrays of ones with length 5. The first will have str entries; the second will have ints.

In [4]:
arr_str = np.ones(5, dtype="str")
arr_str

array(['1', '1', '1', '1', '1'], dtype='<U1')

In [5]:
arr_int = np.ones(5, dtype="int")
arr_int

array([1, 1, 1, 1, 1])

__Example:__ Create a 1D array of ones with the same shape as a previously declared array.

In [6]:
arr2 = np.zeros_like(arr_int)
arr2

array([0, 0, 0, 0, 0])

Note that if we call zeros_like on the array of strings, we get an array of empty strings!

In [7]:
np.zeros_like(arr_str)

array(['', '', '', '', ''], dtype='<U1')

But if we call ones_like on the array of strings, we get strings of 1s. Confusing, I know!

In [8]:
np.ones_like(arr_str)

array(['1', '1', '1', '1', '1'], dtype='<U1')

__Example:__ Create a numpy array using existing data (like a Python list)

In [9]:
python_list = [1.0, 2.0, 3.0, 4.0]
type(python_list)

list

In [10]:
numpy_arr = np.array(python_list)
type(numpy_arr)

numpy.ndarray

__Example:__ Create a numpy array from a nested list

In [11]:
nested_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

numpy_mat = np.array(nested_list)
numpy_mat

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

__Question:__ What do these do?

In [12]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
#TODO: Play with the above cell and then describe np.arange()'s inputs and outputs here. 
#np.arrange creates a 1 row array from inputA to inputB. But, it's created as a list within an array.

In [13]:
np.linspace(0, 10, 6)

array([ 0.,  2.,  4.,  6.,  8., 10.])

In [14]:
np.linspace(0, 10, 10)

array([ 0.        ,  1.11111111,  2.22222222,  3.33333333,  4.44444444,
        5.55555556,  6.66666667,  7.77777778,  8.88888889, 10.        ])

In [None]:

#gets from inputA to inputB in inputC steps

The main difference between a list and an array is that arrays are designed to handle so-called vectorized operations. If you apply a function to the list, it acts on the entire list object (i.e., list += ["hello"] will append hello to the list. If you apply a function to an array, it acts on every individual element.

In [15]:
python_list = [1,2,3,4,5]
python_list += [6]
python_list

[1, 2, 3, 4, 5, 6]

In [16]:
numpy_arr = np.array([1,2,3,4,5])
numpy_arr += 6
numpy_arr

array([ 7,  8,  9, 10, 11])

Let's see a concrete example of vectorized operations using our friends Celsius and Fahrenheit: 

In [17]:
cvalues = [20.1, 20.8, 21.9, 22.5, 22.7, 22.3, 21.8, 21.2, 20.9, 20.1]

In [18]:
C = np.array(cvalues)
print(C)

[20.1 20.8 21.9 22.5 22.7 22.3 21.8 21.2 20.9 20.1]


Now we'll multiply the entire array by 9/5 and add 32! 

In [19]:
print(C * 9 / 5 + 32)

[68.18 69.44 71.42 72.5  72.86 72.14 71.24 70.16 69.62 68.18]


But if we try to do the same thing with a list below, we get an error.

In [20]:
print(cvalues * 9/5 + 32)

TypeError: unsupported operand type(s) for /: 'list' and 'int'

For lists, we could list comprehension instead of vectorized math... But it's not as fast. 

In [21]:
fvalues = [ x*9/5 + 32 for x in cvalues] 
print(fvalues)

[68.18, 69.44, 71.42, 72.5, 72.86, 72.14, 71.24000000000001, 70.16, 69.62, 68.18]


__Lists are dynamic -- their size can change. Arrays' sizes cannot change after they have been created.__

__Every array has one and only one dtype. All items in it should be of that dtype.__

In [22]:
# Create a float 2d array
arr2d_f = np.array([[1,2,3], [4,5,6], [7,8,9]], dtype='float')
arr2d_f


array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]])

In [23]:
#can convert to and from different types. 
arr2d_f.astype('str')

array([['1.0', '2.0', '3.0'],
       ['4.0', '5.0', '6.0'],
       ['7.0', '8.0', '9.0']], dtype='<U32')

In [24]:
# If you *really* want to mix types, you can 
# create an object array to hold numbers as well as strings
arr1d_obj = np.array([1, 'a'], dtype='object')
arr1d_obj

array([1, 'a'], dtype=object)

In [25]:
# Convert an array back to a list, which will interpret the 1 object as a 1 float and the 'a' object as a str.
arr1d_obj.tolist()

[1, 'a']

__An equivalent numpy array uses significantly less memory than a nested Python list.__ 

# Array Shape and Size

In [30]:
# Create a 2d array with 3 rows and 4 columns
list2 = [[1, 2, 3, 1],[3, 4, 5, 1], [5, 6, 2, 8]]
arr2 = np.array(list2, dtype='float')
arr2

array([[1., 2., 3., 1.],
       [3., 4., 5., 1.],
       [5., 6., 2., 8.]])

In [29]:
# shape() prints out the number of rows and the number of columns, in that order 
print('Shape: ', arr2.shape)

# dtype() prints out the data type of the array entries
print('Datatype: ', arr2.dtype)

# size() prints out the total number of elements in the array 
print('Size: ', arr2.size)

# ndim prints out the number of indices we need to uniquely pick out a single element of the array
print('Num Dimensions: ', arr2.ndim)

Shape:  (5,)
Datatype:  int64
Size:  5
Num Dimensions:  1


# Array Indexing

Indexing is similar to lists, but there are a few differences.

In [26]:
arr2

array([0, 0, 0, 0, 0])

In [31]:
list2

[[1, 2, 3, 1], [3, 4, 5, 1], [5, 6, 2, 8]]

In [28]:
arr2[0]

0

In [32]:
list2[0]

[1, 2, 3, 1]

In [33]:
list2[0][0]

1

In [34]:
arr2[0,0]

1.0

In [35]:
arr2[1,1]

4.0

In [36]:
list2[1,1]

TypeError: list indices must be integers or slices, not tuple

In [37]:
list2[1][1]

4

In [38]:
arr2

array([[1., 2., 3., 1.],
       [3., 4., 5., 1.],
       [5., 6., 2., 8.]])

In [39]:
arr2[1:,2:]

array([[5., 1.],
       [2., 8.]])

Arrays support Boolean indexing:

In [40]:
arr2

array([[1., 2., 3., 1.],
       [3., 4., 5., 1.],
       [5., 6., 2., 8.]])

In [41]:
arr2_greater_than_4 = arr2 > 4
arr2_greater_than_4

array([[False, False, False, False],
       [False, False,  True, False],
       [ True,  True, False,  True]])

In [42]:
arr2[arr2_greater_than_4]

array([5., 5., 6., 8.])

In [43]:
arr2[0]

array([1., 2., 3., 1.])

In [44]:
arr2

array([[1., 2., 3., 1.],
       [3., 4., 5., 1.],
       [5., 6., 2., 8.]])

In [45]:
# Reverse only the row positions
arr2[::-1, ]

array([[5., 6., 2., 8.],
       [3., 4., 5., 1.],
       [1., 2., 3., 1.]])

In [46]:
# Reverse the column positions
arr2[:, ::-1]

array([[1., 3., 2., 1.],
       [1., 5., 4., 3.],
       [8., 2., 6., 5.]])

In [47]:
# Reverse both the row and the column positions
arr2[::-1, ::-1]

array([[8., 2., 6., 5.],
       [1., 5., 4., 3.],
       [1., 3., 2., 1.]])

In [48]:
print(arr2)

[[1. 2. 3. 1.]
 [3. 4. 5. 1.]
 [5. 6. 2. 8.]]


In [49]:
arr2.transpose()

array([[1., 3., 5.],
       [2., 4., 6.],
       [3., 5., 2.],
       [1., 1., 8.]])

__Task:__ Define a Python nested list that contains strings describing your top two favorite hobbies for each of the last three years. E.g. if three years ago my favorite hobbies were reading and running, two years ago they were cooking and baking, and last year they were running and writing, my list would be [['reading', 'running'], ['cooking', 'baking'], ['running', 'writing']]. 

1. Convert your list to a numpy array of strings.
2. Print out the hobbies you enjoyed two years ago using array indexing. In my case, the desired output is ['cooking', 'baking'].
3. In one line of code, print out the first hobby you listed as having enjoyed two years ago and three years ago. In my case, the desired output would be ['reading', 'cooking']. 
4. Print out the last hobby you have listed for all three years. In my case, this would be ['running', 'baking', 'writing'].
5. Print out the number of hobbies in the array. In my case, there are six.
6. Without coding anything, write down what the shape of the array would be if you had listed two of your favorite hobbies for the last four years. 
7. Print out an array of Booleans where each entry is True if and only if the hobby in your original array has a string length longer than 6 characters (otherwise, the entry should be False). In my case, the desired output would be: [[True, True], [True, False], [True, True]]. 
8. Now print out only those hobbies that have string length longer than 6 characters. In my case, the desired output would be: ['reading', 'running', 'cooking', 'running', 'writing'].

In [59]:
#TODO: Declare your list here
hobby_lst = ['design','tennis','music','building','growth','investing']

In [60]:
#TODO: #1 Converting lists to arrays
hobby_array = np.array(hobby_lst)

In [61]:
#TODO: #2 Array indexing
fav_hobbies = hobby_array[0], hobby_array[3]
print(fav_hobbies)

('design', 'building')


In [63]:
#TODO: #3 Print out fav hobbies 
fav_hobbies = hobby_array[0], hobby_array[3]
print(fav_hobbies)

('design', 'building')


In [64]:
#TODO: #4 First hobbies in one line 
first_hobbies = hobby_array[0], hobby_array[1]
print(first_hobbies)

('design', 'tennis')


In [65]:
#TODO: #5 Print out the length of the array
print(len(hobby_array))

6


In [None]:
#TODO: #6 Shape of the array with fav hobbies from the past two years
# It would look like a table. 2 rows and 4 columns.

In [75]:
#TODO: #7  & 8 Print out an array of Booleans where each entry is True if and only if the hobby in your original array has a string length longer than 6 characters (otherwise, the entry should be False). 
# In my case, the desired output would be: [[True, True], [True, False], [True, True]]. 

for hobby in hobby_array:
    if len(hobby) > 6:
        print(f'{hobby} is >= 6 characters')
    # else:
    #     hobby_len = len(hobby)
    #     print(f'{hobby} is {hobby_len} chars')


building is >= 6 characters
investing is >= 6 characters


# Simple Stats: Mean, Min, Max

In [76]:
# mean, max and min
print(arr2)
print("Mean value is: ", arr2.mean())
print("Max value is: ", arr2.max())
print("Min value is: ", arr2.min())

[[1. 2. 3. 1.]
 [3. 4. 5. 1.]
 [5. 6. 2. 8.]]
Mean value is:  3.4166666666666665
Max value is:  8.0
Min value is:  1.0


In [77]:
# Row wise and column wise min
print("Column wise minimum: ", np.amin(arr2, axis=0))
print("Row wise minimum: ", np.amin(arr2, axis=1))

Column wise minimum:  [1. 2. 2. 1.]
Row wise minimum:  [1. 1. 2.]


In [78]:
# Cumulative Sum
np.cumsum(arr2)

array([ 1.,  3.,  6.,  7., 10., 14., 19., 20., 25., 31., 33., 41.])

__Task:__ Play with the above cell. Once you're ready, describe np.cumsum()'s inputs and return values. 

In [None]:
Cumalitave sum creates an array item as it adds the first values and so on. 

# Dealing with NaN and Infinities

In [None]:
# Insert a nan and an inf
arr2[1,1] = np.nan  # not a number
arr2[1,2] = np.inf  # infinite
arr2

In [None]:
missing_bool = np.isnan(arr2) | np.isinf(arr2)
arr2[missing_bool] = -1  
arr2

# Creating arrays from existing arrays: be careful!

In [None]:
print(arr2)

In [None]:
arr2a = arr2[:2,:2]
arr2a

In [None]:
arr2a[0, 0] = 100
arr2a

In [None]:
arr2

In [None]:
# Assign portion of arr2 to arr2a. Doesn't really create a new array.
arr2a = arr2[:2,:2]  

print("\n",arr2)

arr2a[:1, :1] = 200  # 100 will reflect in arr2

print("\n",arr2)


In [None]:
# Copy portion of arr2 to arr2b
arr2b = arr2[:2, :2].copy()
arr2b[:1, :1] = 300  # 300 will not reflect in arr2

print(arr2, "\n")
print(arr2b)

# Reshaping and Flattening Arrays

In [None]:
# Reshape a 3x4 array to 4x3 array
arr2

In [None]:
arr2.reshape(4, 3)

In [None]:
arr2.transpose()

In [None]:
arr2.reshape(6, 2)

In [None]:
arr2.reshape(12,1)

In [None]:
arr2.reshape(5,4)

Sometimes it is necessary to collapse a multi-dimensional array into a 1D (or flat) array. 

Two additional methods to flatten an array:
1. ravel() : ravel creates a reference to parent array. Changing flat array changes parent too. But, more memory efficient! 
2. flatten() : flatten creates a copy. Uses more memory but changes to new array will not change parent array.

In [None]:
arr2 = np.array([[12, 3, 4], [1, -5, 6], [-3, 2, 3]])

In [None]:
arr2_flattened = arr2.flatten()
arr2_flattened

In [None]:
arr2_flattened[0] = 1000
arr2

In [None]:
arr2_raveled = arr2.ravel()
arr2_raveled

In [None]:
arr2_raveled[0] = 4000
arr2

# Create repeating patterns

In [None]:
a = [1,2,3] 

# Repeat whole of 'a' two times
print('Tile:   ', np.tile(a, 2))

# Repeat each element of 'a' two times
print('Repeat: ', np.repeat(a, 2))

# Generating random numbers

In [None]:
# Random numbers between [0,1) of shape 2,2
print(np.random.rand(2,2))

In [None]:
# Normal distribution with mean=0 and variance=1 of shape 2,2
print(np.random.randn(2,2))

In [None]:
# Random integers between [0, 10) of shape 2,2
print(np.random.randint(0, 10, size=[2,2]))

In [None]:
# One random number between [0,1)
print(np.random.random())

In [None]:
# Random numbers between [0,1) of shape 2,2
print(np.random.random(size=[2,2]))

In [None]:
# Pick 10 items from a given list, with equal probability
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10))  

In [None]:
# Pick 10 items from a given list with a predefined probability 'p'
print(np.random.choice(['a', 'e', 'i', 'o', 'u'], size=10, p=[0.3, 0.1, 0.1, 0.4, 0.1]))  # picks more o's

In [None]:
#if you want random numbers, but you want your code to produce the same random numbers every run,
# you can give numpy.random() a seed. If you seed it with the same number every time, it will
# return the same random numbers whenever you call it.

In [None]:
np.random.seed(42)

In [None]:
print(np.random.random())
print(np.random.random())
print(np.random.random())

In [None]:
np.random.seed(42)

In [None]:
print(np.random.random())
print(np.random.random())
print(np.random.random())

# Find unique items in array and how often they occur

In [None]:
# Create random integers of size 10 between [0,10)
np.random.seed(100)
arr_rand = np.random.randint(0, 10, size=10)
print(arr_rand)

In [None]:
# Get the unique items and their counts
uniqs, counts = np.unique(arr_rand, return_counts=True)
print("Unique items : ", uniqs)
print("Counts       : ", counts)
