In [3]:
# References:
# Quickstart tutorial: https://docs.scipy.org/doc/numpy/user/quickstart.html
# Docs - Numpy Reference: https://docs.scipy.org/doc/numpy/reference/index.html
# Stanford CS Class (Justin Johnson): http://cs231n.github.io/python-numpy-tutorial/
# Datacamp cheat sheet: https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Numpy_Python_Cheat_Sheet.pdf

In [4]:
# Imports
import random as rand
import numpy as np

In [5]:
# Life without arrays
rand.seed(22)
names = ['Krista', 'Jose', 'Arun', 'Marek', 'Joe']
heights = [rand.randint(150,200) for i in names]
ages = [rand.randint(18,80) for i in names]
print(names)
print(heights)
print(ages)

['Krista', 'Jose', 'Arun', 'Marek', 'Joe']
[158, 165, 151, 189, 178]
[29, 62, 25, 65, 59]


In [6]:
# Multiple each age, add 2 to each age
print(ages*2)
print(ages+2)

[29, 62, 25, 65, 59, 29, 62, 25, 65, 59]


TypeError: can only concatenate list (not "int") to list

In [8]:
# Multiply age by 2
def elementwise_multiplication(my_list, var):
    new = []
    for element in my_list:
        new.append(element*var)
    return new

age_by_two = elementwise_multiplication(ages, 2)
print(age_by_two)   

# With numpy
print(np.array(ages)*2)

[58, 124, 50, 130, 118]
[ 58 124  50 130 118]


In [9]:
# Consider how you would perform the following using python lists:
    # Select only ages where height is above 160 cm
    # Get Arun's information (age and height) only
    # Sort by height

In [10]:
# NumPy also facilitates faster operations when large datasets are involved because NumPy uses vectorized operations
# No loops necessary

basic_python = [i for i in range(10000)]
numpy_fun = np.arange(10000)

% timeit elementwise_multiplication(basic_python,500)
% timeit numpy_fun*500

864 µs ± 36.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
8.11 µs ± 102 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [41]:
# Create an array
a = np.array([1, 2, 3])   # Create a rank 1 array
print(a)
print(type(a)) 

[1 2 3]
<class 'numpy.ndarray'>


In [42]:
# List-like functionality
print(a[0]) 
a[0] = 100
print(a)

1
[100   2   3]


In [46]:
print(a.shape)  # View the shape  
print(len(a))

(3,)
3


In [52]:
# Multidimensional array
m = np.array([[1,2,3], [4,5,6]])
print(m)
print(m.shape) # (rows, columns)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [53]:
# Creating different types of multidimensional arrays
a = np.zeros((2,2))   # Create an array of all zeros
print(a)                   

[[0. 0.]
 [0. 0.]]


In [54]:
b = np.ones((2,2))    # Create an array of all ones
print(b)              
c = np.full((2,2), 7)  # Create a constant array
print(c)          

[[1. 1.]
 [1. 1.]]
[[7 7]
 [7 7]]


In [55]:
d = np.eye(2,2)         # Create a 2x2 identity matrix
print(d)              
e = np.random.random((2,2))  # Create an array filled with random values
print(e)      

[[1. 0.]
 [0. 1.]]
[[0.0908199  0.2662186 ]
 [0.66192065 0.38726228]]


In [14]:
# Creating 1 dimensional arrays
f = np.arange(10) # Creates 1D array - numbers 0 to 10
print(f)
g = np.arange(2,11,2) # (start, end, step)
print(g)
h = np.repeat(22,5) # Create 1D array of 22s 
print(h)

[0 1 2 3 4 5 6 7 8 9]
[ 2  4  6  8 10]
[22 22 22 22 22]


In [61]:
# Viewing an array
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(a)
print()
print(a.shape) #(rows, columns)
print()
print(a.ndim) # Dimensions 
print()
print(a.size) # Product of shape

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

(3, 4)

2

12


In [58]:
# A three-dimensional array
three_d = np.array([[[1],[2],[3],[4]],[[5],[6],[7],[8]]])
print(three_d.shape)
print(three_d.ndim)

(2, 4, 1)
3


In [62]:
# Slicing
# Get first two columns of first row
print(a[:1, :2])
print()

[[1 2]]



In [63]:
# Get first two columns of first TWO rows
print(a[:2, :2])
print()

[[1 2]
 [5 6]]



In [64]:
# Get last column 
print(a[:,-1])

[ 4  8 12]


In [65]:
# Integer indexing
print(a[0])

[1 2 3 4]


In [66]:
# Boolean indexing
bool_id=a>6
print(bool_id)
print()

print(a[bool_id])
print()
print(a[a>6])

[[False False False False]
 [False False  True  True]
 [ True  True  True  True]]

[ 7  8  9 10 11 12]

[ 7  8  9 10 11 12]


In [20]:
# Datatypes 
print(a.dtype)

print(np.array([1.,2.]).dtype)

print(np.array([1.,2.],dtype='int32').dtype) # Force a certain type

print(np.array(['a','b','c']).dtype)

print(np.array([1.,'a', 2])) # Unklike lists, the elements of a NumPy array must be the same type

int32
float64
int32
<U1
['1.0' 'a' '2']


In [21]:
# Exercise:
# Create an array using the heights and ages lists from the begining
# Tell me the shape and dimensions
# Select Arun's information only
# Select only ages where height is above 160 cm
# Print the current datatype
# Convert the datatype to float (hint: astype())

my_array = np.array([heights, ages])
print(my_array)
print(my_array.shape)
print(my_array.ndim)
arun_index = names.index('Arun')
print(my_array[:,arun_index])
print()

print(my_array[1,my_array[0]>160])
print()

print(my_array.dtype)
print(my_array.astype('float'))

[[158 165 151 189 178]
 [ 29  62  25  65  59]]
(2, 5)
2
[151  25]

[62 65 59]

int32
[[158. 165. 151. 189. 178.]
 [ 29.  62.  25.  65.  59.]]


In [22]:
# Arithmetic operators 
A = np.array([[4,6],[8,10]], dtype=np.float64)
B = np.array([[1,1],[2,2]], dtype=np.float64)

print(A)
print(B)
print()

print(A-B)  # Elementwise subtraction
print(np.subtract(A,B)) 
print()

print(A+B) # np.add()

[[ 4.  6.]
 [ 8. 10.]]
[[1. 1.]
 [2. 2.]]

[[3. 5.]
 [6. 8.]]
[[3. 5.]
 [6. 8.]]

[[ 5.  7.]
 [10. 12.]]


In [23]:
print(A*B) # np.multiply() - elementwise
print()

print(A/B) #np.divide()
print()

[[ 4.  6.]
 [16. 20.]]

[[4. 6.]
 [4. 5.]]



In [24]:
print(A/2)
print()

print(np.sqrt(A))
print()

print(np.log(A))
print()

print(np.exp(np.log(A)))
print()

print(np.sin(A))

[[2. 3.]
 [4. 5.]]

[[2.         2.44948974]
 [2.82842712 3.16227766]]

[[1.38629436 1.79175947]
 [2.07944154 2.30258509]]

[[ 4.  6.]
 [ 8. 10.]]

[[-0.7568025  -0.2794155 ]
 [ 0.98935825 -0.54402111]]


In [25]:
# Aggregation functions
print(A)
print()

print(A.sum())
print(np.sum(A))
print(np.sum(A, axis=0))
print()

print(np.mean(A))
print()

print(np.median(A))
print()

print(np.round(np.percentile(A,.25),2))

[[ 4.  6.]
 [ 8. 10.]]

28.0
28.0
[12. 16.]

7.0

7.0

4.02


In [26]:
# Exercise:
# Tell me the mean of both heights and ages in one line of code
# Tell me the square root of Arun's age and height
# Print the log of heights

print(my_array)
print(my_array.mean(axis=1))
arun_info = my_array[:,arun_index]
print(arun_info)
print(np.sqrt(arun_info))
print(np.log(my_array[0]))

[[158 165 151 189 178]
 [ 29  62  25  65  59]]
[168.2  48. ]
[151  25]
[12.28820573  5.        ]
[5.06259503 5.10594547 5.01727984 5.24174702 5.18178355]


In [27]:
# Matrix multiplication
a = np.array([1,2])
b = np.array([2,10])
print(np.dot(a,b)) # Dot product of two vectors
print(a.dot(b))

22
22


In [28]:
print(A)
print(B)
print()
print(A.dot(B)) 
print(A @ B)

[[ 4.  6.]
 [ 8. 10.]]
[[1. 1.]
 [2. 2.]]

[[16. 16.]
 [28. 28.]]
[[16. 16.]
 [28. 28.]]


In [29]:
a.dot(B)

array([5., 5.])

In [30]:
# Exercise:
# Verify the above by hand

In [31]:
# Reshaping arrays
A.T # Transpose

array([[ 4.,  8.],
       [ 6., 10.]])

In [32]:
A.ravel() # Flatten

array([ 4.,  6.,  8., 10.])

In [33]:
# Create an array that looks like this: np.array([[1],[1],[1],[1]])
A.reshape(4,1)

array([[ 4.],
       [ 6.],
       [ 8.],
       [10.]])

In [34]:
A.reshape(1,4)

array([[ 4.,  6.,  8., 10.]])

In [35]:
#Create a three dimensional array
print(A.reshape(2,2,1))
print(A.reshape(4,1,1))

[[[ 4.]
  [ 6.]]

 [[ 8.]
  [10.]]]
[[[ 4.]]

 [[ 6.]]

 [[ 8.]]

 [[10.]]]


In [36]:
A.tolist()

[[4.0, 6.0], [8.0, 10.0]]

In [37]:
# Sorting
a=np.array([3,1,5,2,4])
sort_i=np.argsort(a)
print(sort_i)
print(a[sort_i])

[1 3 0 4 2]
[1 2 3 4 5]


In [38]:
# Exercise
# Sort the height, age array by height
# Create a new array that has the height, age pair for each person as rows i.e., [[height1, age1], [height2...
# Select Arun's information from the new array
print(my_array[:,np.argsort(my_array[0])])
print()
paired_array=my_array.T
print(paired_array)
print()
print(paired_array[arun_index,:])

[[151 158 165 178 189]
 [ 25  29  62  59  65]]

[[158  29]
 [165  62]
 [151  25]
 [189  65]
 [178  59]]

[151  25]
