# Numpy

In [8]:
import numpy as np

## Array

### 1D array

In [12]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [14]:
np.array([3.14, 2, 3, 4])

array([3.14, 2.  , 3.  , 4.  ])

In [16]:
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [18]:
a = np.array([1, 2, 3, 4])

In [20]:
type(a)

numpy.ndarray

a.shape

In [23]:
a.ndim

1

In [25]:
a.size

4

### 2D array

In [28]:
A = np.array([[1, 2, 3],
              [4, 5, 6]])

In [30]:
type(A)

numpy.ndarray

In [32]:
A.shape

(2, 3)

In [34]:
A.ndim

2

In [36]:
A.size

6

## Create arrays from scratch

### `zeros`

In [40]:
np.zeros(shape=(5, 4), dtype='int')

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

### `ones`

In [43]:
np.ones(shape=(3, 5), dtype='float')

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

### `arange`

In [46]:
np.arange(start=0, stop=20, step=2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

### `full`

In [49]:
np.full(shape=(3, 4), fill_value=5)

array([[5, 5, 5, 5],
       [5, 5, 5, 5],
       [5, 5, 5, 5]])

### `linspace`

In [52]:
np.linspace(start=0, stop=1, num=5) 

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

## random

In [55]:
np.random.random(size=(4, 4))

array([[0.64648659, 0.39916407, 0.47458849, 0.42674816],
       [0.48369852, 0.51834367, 0.95447673, 0.46007777],
       [0.59939595, 0.63916839, 0.42984701, 0.3330544 ],
       [0.83450549, 0.60320116, 0.23055349, 0.66388795]])

In [57]:
np.random.rand(4, 5)

array([[0.96056197, 0.79317438, 0.15360167, 0.9852552 , 0.84948692],
       [0.39657373, 0.25602684, 0.26381244, 0.49758712, 0.64804958],
       [0.60005754, 0.89529759, 0.67594534, 0.64733566, 0.93076364],
       [0.69532787, 0.27770411, 0.27327722, 0.87156613, 0.59559613]])

In [59]:
np.random.seed(123)
np.random.random(size=(4, 4))

array([[0.69646919, 0.28613933, 0.22685145, 0.55131477],
       [0.71946897, 0.42310646, 0.9807642 , 0.68482974],
       [0.4809319 , 0.39211752, 0.34317802, 0.72904971],
       [0.43857224, 0.0596779 , 0.39804426, 0.73799541]])

In [61]:
np.random.normal(loc=0, scale=1, size=(3, 3))

array([[-0.44398196, -0.43435128,  2.20593008],
       [ 2.18678609,  1.0040539 ,  0.3861864 ],
       [ 0.73736858,  1.49073203, -0.93583387]])

In [63]:
np.random.randint(low=3, high=5, size=(4, 5))

array([[3, 3, 4, 3, 4],
       [3, 3, 4, 3, 4],
       [3, 4, 3, 3, 3],
       [3, 4, 3, 3, 4]], dtype=int32)

## Indexing & Slicing

### Indexing 1D array

In [67]:
np.random.seed(10)
x = np.random.randint(20, size=6)

In [69]:
x

array([ 9,  4, 15,  0, 17, 16], dtype=int32)

In [71]:
x[0], x[4], x[-1]

(np.int32(9), np.int32(17), np.int32(16))

### Indexing 2D array

In [74]:
np.random.seed(10)
X = np.random.randint(10, size=(3, 4))

In [76]:
X

array([[9, 4, 0, 1],
       [9, 0, 1, 8],
       [9, 0, 8, 6]], dtype=int32)

In [78]:
X[0, 0], X[1, 0], X[2, 2]

(np.int32(9), np.int32(9), np.int32(8))

### Slicing
`x[start:stop:step]`

In [81]:
x

array([ 9,  4, 15,  0, 17, 16], dtype=int32)

In [83]:
x[0:3]

array([ 9,  4, 15], dtype=int32)

In [85]:
x[2:4]

array([15,  0], dtype=int32)

In [87]:
x[::2]

array([ 9, 15, 17], dtype=int32)

In [89]:
X

array([[9, 4, 0, 1],
       [9, 0, 1, 8],
       [9, 0, 8, 6]], dtype=int32)

In [91]:
X[:2]

array([[9, 4, 0, 1],
       [9, 0, 1, 8]], dtype=int32)

In [93]:
X[:2, :3]

array([[9, 4, 0],
       [9, 0, 1]], dtype=int32)

In [95]:
X[:, :2]

array([[9, 4],
       [9, 0],
       [9, 0]], dtype=int32)

## Reshaping

In [103]:
grid = np.arange(1, 10)

In [111]:
grid

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [107]:
grid.shape

(9,)

In [109]:
grid.reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [113]:
v = np.array([1, 2, 3])

In [119]:
v

array([1, 2, 3])

In [115]:
v.shape

(3,)

In [125]:
v = v.reshape((1, 3))

In [127]:
v.shape

(1, 3)

## Transpose

In [133]:
x = np.array([[1., 2.], [3., 4.]])

In [135]:
x

array([[1., 2.],
       [3., 4.]])

In [137]:
x.T

array([[1., 3.],
       [2., 4.]])

## Concatenation

In [147]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])

In [153]:
np.concatenate((x, y))

array([1, 2, 3, 4, 5, 6])

In [165]:
A = np.array([[1, 2, 3],
                [4, 5, 6]])

In [167]:
np.concatenate((A, A), axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [187]:
np.vstack((x, A))

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])

In [197]:
B = np.array([[99],
              [99]])

In [199]:
B

array([[99],
       [99]])

In [201]:
np.hstack((B, A))

array([[99,  1,  2,  3],
       [99,  4,  5,  6]])

## Splitting

In [204]:
x = np.array([1, 2, 3, 5, 667, 99, 3, 1])

In [216]:
np.split(x, 4)

[array([1, 2]), array([3, 5]), array([667,  99]), array([3, 1])]

In [206]:
np.split(x, [3, 5])

[array([1, 2, 3]), array([  5, 667]), array([99,  3,  1])]

## Broadcasting and Vectorized operations
![image-broadcasting](https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png)

In [221]:
a = np.arange(3)

In [223]:
a

array([0, 1, 2])

In [225]:
a + 5

array([5, 6, 7])

In [239]:
A = np.ones((3, 3))

In [241]:
A

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [243]:
A + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [273]:
x = np.array([0, 1, 2])
Y = np.array([[0],
              [1], 
              [2]])

In [275]:
x, Y

(array([0, 1, 2]),
 array([[0],
        [1],
        [2]]))

In [277]:
x + Y

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [279]:
x * Y

array([[0, 0, 0],
       [0, 1, 2],
       [0, 2, 4]])

## Manipulating & Comparing

### Aggregation

In [314]:
massive_array = np.random.random(100000)

In [317]:
%timeit sum(massive_array) # Python sum()

6.16 ms ± 349 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [319]:
%timeit np.sum(massive_array) # Numpy sum()

28.9 μs ± 869 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [321]:
np.mean(massive_array)

np.float64(0.5006567680250298)

In [325]:
np.max(massive_array)

np.float64(0.9999991190943784)

In [327]:
np.min(massive_array)

np.float64(8.590803772801081e-07)

## Standard Deviation & Variance

Standard deviation is a measure of how spread out numbers are.

The formula is easy: It is the square root of the Variance.

Variance is the average of the squared differences from the Mean.

In [336]:
dog_height = np.array([600, 470, 170, 430, 300])

In [338]:
np.std(dog_height)

np.float64(147.32277488562318)

In [342]:
np.var(dog_height)

np.float64(21704.0)

In [346]:
np.sqrt(np.var(dog_height))

np.float64(147.32277488562318)

## Sorting

In [349]:
np.sort(dog_height)

array([170, 300, 430, 470, 600])

In [351]:
np.argsort(dog_height)

array([2, 4, 3, 1, 0])

In [355]:
np.random.seed(42)

In [357]:
A = np.random.randint(0, 10, size=(4, 6))

In [366]:
A

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]], dtype=int32)

In [362]:
np.sort(A, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]], dtype=int32)

In [364]:
np.sort(A, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]], dtype=int32)

## Linear Algebra

In [373]:
A = np.array([[1, 2, 3],
            [4, 5, 6],
            [7, 8, 9]])
B = np.array([[6, 5],
            [4, 3],
            [2, 1]])

In [375]:
A, B

(array([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]),
 array([[6, 5],
        [4, 3],
        [2, 1]]))

In [379]:
# A (3x3) dot B (3x2) -> (3x2)
# 1*6 + 2*4 + 3*2 = 20
# ...

In [377]:
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [387]:
# B (3x2) dot A (3x3) -> X
# B.T (2x3) dot A (3x3) -> (2x3)

In [385]:
B.T.dot(A)

array([[36, 48, 60],
       [24, 33, 42]])

### Dot product example

In [394]:
np.random.seed(0)
sales_amounts = np.random.randint(20, size=(5, 3))

In [400]:
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]], dtype=int32)

In [432]:
import pandas as pd

weekly_sales = pd.DataFrame(sales_amounts, index=['Mon', 'Tues', 'Wed', 'Thus', 'Fri'],
                            columns=['Almond Butter', 'Peanut Butter', 'Cashew Butter'])
                            

In [406]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thus,4,6,12
Fri,1,6,7


In [412]:
prices = np.array([10., 8., 12.]).reshape(1, 3)

In [418]:
butter_prices = pd.DataFrame(prices, index=['Price'], 
                             columns=['Almond Butter', 'Peanut Butter', 'Cashew Butter'])

In [416]:
butter_prices

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10.0,8.0,12.0


In [426]:
weekly_sales.shape, butter_prices.shape

((5, 3), (1, 3))

In [428]:
revenue = weekly_sales.dot(butter_prices.T)

In [430]:
revenue

Unnamed: 0,Price
Mon,240.0
Tues,138.0
Wed,458.0
Thus,232.0
Fri,142.0


In [434]:
weekly_sales['Revenue'] = revenue

In [436]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Revenue
Mon,12,15,0,240.0
Tues,3,3,7,138.0
Wed,9,19,18,458.0
Thus,4,6,12,232.0
Fri,1,6,7,142.0
