In [None]:
import numpy as np
import pandas as pd


In [None]:
#very similar to python lists
#2 main reasons why numpy is better
    #- much of numpy functionality is written in c, which is much faster than python lists
    #- easy to store computer readable data in arrays

### Introduction

In [None]:
#numpy is the backbone in which the numerical data is 
#stored into in which machine learning algorithms can find patterns
#backbone of many other python scientific packages

In [None]:
#gets its speed through vectorization via broadcasting (avoiding loops)

### Numpy DataTypes and Attributes

In [None]:
import numpy as np
import pandas as pd

In [None]:
#numpy's main datatype is ndarray

In [None]:
a1 = np.array([1,2,3])
a1

In [None]:
type(a1)

In [None]:
a2 = np.array([[1,2,3.3],[4,5,6.5]])
a3 = np.array([[[1,2,3],[4,5,6],[7,8,9]],
              [[10,11,12],[13,14,15],[16,17,18]]])

In [None]:
a3[0][1][2]

In [None]:
a2

In [None]:
a3

![image.png](attachment:image.png)

In [None]:
a1.shape

In [None]:
a2.shape

In [None]:
a1.ndim

In [None]:
a2.ndim

In [None]:
a3.ndim

In [None]:
a1.dtype, a2.dtype, a3.dtype

In [None]:
a1.size, a2.size, a3.size #total number of values in the array

In [None]:
type(a1), type(a2), type(a3) #ndarray is a universal datatype for anything stored in numpy

In [None]:
#create a dataframe from np array
df = pd.DataFrame(a2)
df

### Creating np arrays

In [None]:
sample_array = np.array([1,2,3])
sample_array

In [None]:
#np.ones(shape, dtype=None, order='C', *, like=None)

In [None]:
ones = np.ones((2,3))
ones
#default dtype is float

In [None]:
#np.zeros(shape, dtype=float, order='C', *, like=None)

In [None]:
zeros = np.zeros((2,3))
zeros
#default dtype is float

In [None]:
#np.arange([start,] stop[, step,], dtype=None, *, like=None)
#Return evenly spaced values within a given interval.

In [None]:
range_array = np.arange(3)
range_array

In [None]:
range_array = np.arange(0,10,2)
range_array
#starts at 0, stops at 10, step of 2
#start is inclusive, stop is exclusive

In [None]:
#np.random.randint(low, high=None, size=None, dtype=int)
#Return random integers from `low` (inclusive) to `high` (exclusive).

In [None]:
random_array = np.random.randint(0,10, size = (3,5))
random_array
#3 rows, 5 columns
#low is inclusive, high is exclusive

In [None]:
random_array.size, random_array.shape

In [None]:
#np.random.random(size=None)
#Return random floats in the half-open interval [0.0, 1.0)

In [None]:
random_array2 = np.random.random(size = (5,3))

random_array2

In [None]:
#np.random.rand(d0, d1, ..., dn)
#Random values in a given shape.

In [None]:
random_array3 = np.random.rand(5,3)
random_array3

In [None]:
#random numbers in numpy are psuedo-random numbers.  they are actually set with a random seed.

### Numpy Random Seed

In [None]:
random_array4 = np.random.randint(10, size = (5,3))
random_array4

In [None]:
#if we wanted to share our notebook with somebody else and have them run through the cells and 
#get the exact same answers we got?   we can set a random seed so all the random values are exactly the same

In [None]:
#random numbers that are reproducable
np.random.seed(seed = 99999)
random_array4 = np.random.randint(10, size =(5,3))
random_array4

In [None]:
#the values above were the exact same values he got in the video

In [None]:
#lets try with another random method
np.random.seed(7)
random_array5 = np.random.random((5,3))
random_array5
#still the same as the values in the video

In [None]:
np.random.random((5,3))
#the random seed only works in the cell it was defined it

### Viewing Arrays and Matrices

In [None]:
#find all unique values of a matrix
#np.unique(ar,return_index=False,return_inverse=False,return_counts=False,axis=None,)
np.unique(random_array4)
#returns a 1d array

In [None]:
a1, a2, a3

In [None]:
#element 0 of a1
a1[0]

In [None]:
#element 0 of a2
a2[0]

In [None]:
#element 0 of a3
a3[0]

In [None]:
a2[1]

In [None]:
a3

In [None]:
#can also use slicing with arrays
a3[:2, :2, :2]
#first 2 vectors, first 2 rows, first 2 columns
#slicing is inclusive at start and exclusive at stop

In [None]:
a4 = np.random.randint(10, size = (2,3,4,5))
print(a4, a4.shape, a4.ndim)

In [None]:
a4[0,0,2,4]

In [None]:
#only first 3 numbers of the inner most arrays
a4[:,:,:,:3]

### Manipulating arrays

In [None]:
a1

In [None]:
ones = np.ones(3)
ones

In [None]:
#elementwise addition
a1 + ones
np.add(a1, ones)

In [None]:
#elementwise subtraction
a1-ones
np.subtract(a1,ones)

In [None]:
#elementwise multiplication
a1*ones

In [None]:
a2

In [None]:
a2*a1
#takes a1 vector and multiplies it by each of the vectors
#in the a2 matrix

In [None]:
print(a1)
print(a2)
print(a3)

In [None]:
#a2*a3
#unable to broadcast with 2 different shapes

In [None]:
a1*a3
np.multiply(a1,a3)

In [None]:
#how can we reshape a2 to be compatible with a3
#'how to reshape numpy array'


In [None]:
a1/ones
np.divide(a1,ones)

In [None]:
#floor division (rounds down)
a2//a1
np.floor(a2)

In [None]:
#normal division
a2/a1
np.divide(a2, a1)

In [None]:
print(a2)
a2 ** 2
np.square(a2)

In [None]:
a1 % 2
np.mod(a1, 2)

In [None]:
np.exp(a1)
#calculates the exponential of all elements
#e^x where each value of a1 is x and e is eulers constant (2.718281)

In [None]:
np.log(a1)
#The natural logarithm `log` is the inverse of the exponential function,
#so that `log(exp(x)) = x`. The natural logarithm is logarithm in base
#`e`.


#The natural logarithm of x is the power to which 
#e would have to be raised to equal x. 
#For example, ln 7.5 is 2.0149..., because e2.0149... = 7.5. 
#The natural logarithm of e itself, ln e, is 1, 
#because e1 = e, while the natural logarithm of 1 is 0, since e0 = 1.
#2.718281828459 ** 0.69314718 = 2

### Aggregation

In [None]:
#aggregation is performing the same operation on a number of things

In [None]:
sum(a1)
np.sum(a1)
#use python methods on python data stypes
#use np methods on np data types

In [None]:
massive_array = np.random.random(100000)

In [None]:
%timeit sum(massive_array)
%timeit np.sum(massive_array)

In [None]:
np.mean(a2)
# (1+2+3.3+4+5+6.5)/6

In [None]:
np.max(a2), np.min(a2)

In [None]:
np.std(a2)
#std is a measure of how spread out a group
#of numbers is from the mean

#std = sqrt(var)

In [None]:
np.var(a2)
#variance = measure of average degree to which each
#number is different to the mean

#higher variance = wider range of numbers
#lower variance = lower range of numbers

In [None]:
np.sqrt(np.var(a2))
#this is the same as std

### Std and Variance

In [None]:
#both measure the spread of the data

In [None]:
high_var_array = np.array([1,100,200,300,4000,5000])
low_var_array = np.array([2,4,6,8,10])

In [None]:
np.var(high_var_array), np.var(low_var_array)

In [None]:
np.std(high_var_array), np.std(low_var_array)
#on average a number in high_var_array is 2072 away from the mean
#in low_var_array any number is on average only 2.8 away from the mean

In [None]:
np.mean(high_var_array), np.mean(low_var_array)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.hist(high_var_array)

In [None]:
plt.hist(low_var_array)

### reshape and transpose

In [None]:
a2, a3

In [None]:
a2.shape, a3.shape

In [None]:
#a2*a3

In [None]:
a2.reshape(2,3,1)

In [None]:
a2_reshape = a2.reshape(2,3,1)

In [None]:
a2_reshape * a3

In [None]:
a2.reshape(2,1,3), a3, a2.reshape(2,1,3)*a3
#this reshape should take a2[0] times each element of a3[0]
#and a2[1] times each element of a3[1]

In [None]:
a2, a2.shape

In [None]:
a2.T, a2.T.shape

In [None]:
a3, a3.shape

In [None]:
a3.T, a3.T.shape

### Dot Product vs Element Wise

In [None]:
#when we would ever want to use a transpose? when we need to use the dot product!

In [None]:
np.random.seed(0)
mat1 = np.random.randint(10,size = (5,3))
mat2 = np.random.randint(10, size = (5,3))
print(mat1)
print(mat2)

In [None]:
np.multiply(mat1, mat2)
#elementwise multiplication

In [None]:
#np.dot(mat1,mat2)   errors out

![image.png](attachment:006b6641-fa13-4fc4-9e9c-904c2fd5939f.png)

![image.png](attachment:8b652294-618b-4897-becc-5402778ff140.png)

In [None]:
#numbers on the inside must match and numbers on the outside form the 
#new matrix size.

In [None]:
#number of columns in first must match number of rows in second or it will throw error.

In [None]:
mat1.shape, mat2.shape
#based on the rules above we would have to transpose mat2 to get a shape that works 

In [None]:
mat2_T = mat2.T
print(mat1.shape, mat2_T.shape)
#so now the inside numbers match and the resulting matrix will be 5x5

In [None]:
print(mat1)
print(mat2)
print(mat2_T)

In [None]:
mat1.dot(mat2_T)

In [None]:
#dot products are just another tool to find patterns between two different arrays of numbers