In [1943]:
import numpy as np

### Creating Numpy Arrays from Python Lists

In [1944]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [1945]:
np.array([3.14, 2, 3, 4])

array([3.14, 2.  , 3.  , 4.  ])

In [1946]:
np.array([1, 2, 3, 4], dtype=float)

array([1., 2., 3., 4.])

In [1947]:
a1 = np.array([1, 2, 3, 4])
print(type(a1))   
print(a1.shape)
print(a1.size)  
print(a1.ndim)                                      

<class 'numpy.ndarray'>
(4,)
4
1


In [1948]:
a2 = np.array([[1, 2, 3], [4, 5, 6]])
print(type(a2))
print(a2.shape)
print(a2.size)
print(a2.ndim)

<class 'numpy.ndarray'>
(2, 3)
6
2


### Creating Numpy Arrays from Scratch

#### `zeros`

In [1949]:
np.zeros((2, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [1950]:
np.zeros((2, 4)).dtype

dtype('float64')

In [1951]:
np.zeros((2, 4), dtype=int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

#### `ones`

In [1952]:
np.ones((3, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [1953]:
np.ones((3, 5), dtype=int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

#### `arange`

In [1954]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

#### `full`

In [1955]:
np.full((3, 5), 6.9)

array([[6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9]])

#### `linspace`

In [1956]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

#### `rand`

In [1957]:
# seed for reproducibility
np.random.seed(0)

In [1958]:
np.random.random((4, 4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [1959]:
np.random.normal(0, 1, size=(3, 3))

array([[ 0.44386323,  0.33367433,  1.49407907],
       [-0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 ]])

In [1960]:
np.random.randint(0, 9, size=(3, 4)) #[0, 9)

array([[7, 2, 0, 0],
       [4, 5, 5, 6],
       [8, 4, 1, 4]], dtype=int32)

In [1961]:
np.random.rand(4, 4)

array([[0.6818203 , 0.3595079 , 0.43703195, 0.6976312 ],
       [0.06022547, 0.66676672, 0.67063787, 0.21038256],
       [0.1289263 , 0.31542835, 0.36371077, 0.57019677],
       [0.43860151, 0.98837384, 0.10204481, 0.20887676]])

### Array Indexing & Slicing

#### `One-dimensional subarray`

In [1962]:
x1 = np.random.randint(0, 20, 6)
print(x1)

[ 3 12  4  8 14 15]


In [1963]:
x1[4], x1[0], x1[-1]

(np.int32(14), np.int32(3), np.int32(15))

#### `Multi-demensional subarray`

In [1964]:
x2 = np.random.randint(0, 10, size=(3, 4))
print(x2)

[[4 3 7 5]
 [5 0 1 5]
 [9 3 0 5]]


In [1965]:
x2[1, 3]

np.int32(5)

In [1966]:
x2[1, 3] = 1000
print(x2)

[[   4    3    7    5]
 [   5    0    1 1000]
 [   9    3    0    5]]


#### `Slicing`
x[start:stop:step]

In [1967]:
x1

array([ 3, 12,  4,  8, 14, 15], dtype=int32)

In [1968]:
x1[0:3]

array([ 3, 12,  4], dtype=int32)

In [1969]:
x1[2:4]

array([4, 8], dtype=int32)

In [1970]:
# every other element, every 2 step
x1[::2]

array([ 3,  4, 14], dtype=int32)

In [1971]:
x2

array([[   4,    3,    7,    5],
       [   5,    0,    1, 1000],
       [   9,    3,    0,    5]], dtype=int32)

In [1972]:
# two rows, three columns
x2[:2, :3]

array([[4, 3, 7],
       [5, 0, 1]], dtype=int32)

In [1973]:
x2[:, :2]

array([[4, 3],
       [5, 0],
       [9, 3]], dtype=int32)

### Reshaping of Arrays & Transpose

#### `reshape`

In [1974]:
grid = np.arange(1, 10)
grid.shape

(9,)

In [1975]:
grid.reshape(3, 3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [1976]:
x = np.array([1, 2, 3])
x.shape

(3,)

In [1977]:
x.reshape(1, 3).shape

(1, 3)

#### `Transpose`

In [1978]:
x = np.array([[1, 2], [3, 4]])
print(x)

[[1 2]
 [3 4]]


In [1979]:
x.T

array([[1, 3],
       [2, 4]])

### Array Concatenation Splitting

#### `concatenate`

In [1980]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [1981]:
np.concatenate((x, y)) # defual axis=0 (axis=0 => rows, axis=1 => cols)

array([1, 2, 3, 3, 2, 1])

In [1982]:
grid = np.array([[1, 2, 3],
                  [4, 5, 6]])
print(grid)

[[1 2 3]
 [4 5 6]]


In [1983]:
np.concatenate((grid, grid), axis=0)

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [1984]:
np.concatenate((grid, grid), axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

#### `vstack(vertical stack)`

In [1985]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7], [6, 5, 4]])

In [1986]:
np.vstack((x, grid))

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

#### `hstack(horizontal stack)`

In [1987]:
y = np.array([[99], [99]])
np.hstack((y, grid))

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

#### `Splitting of arrays`

In [1988]:
x = np.array([1, 2, 3, 99, 69, 3, 2, 1])

In [1989]:
x1, x2, x3 = np.split(x, [3, 5])

### Broadcasting and Vectorized operations
Broadcasting is simply a set of rules for applying binary ufuncs (e.g., addition, subtraction, mulplication, divison, etc.) on arrays of different sizes
<p align="center">
  <img src="broadcasting.png" alt="example">
</p>


In [1990]:
a = np.arange(3) # [0, 1, 2]
a + 5 # Broadcasting

array([5, 6, 7])

In [1991]:
b = np.ones((3, 3))

In [1992]:
b.shape, a.shape

((3, 3), (3,))

In [1993]:
b + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [1994]:
a.reshape(3, 1) + a

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

### Manipulating & Compating Arrays

#### `Aggregation`

In [1995]:
list_number = [1, 2, 3]

In [1996]:
ll = np.array(list_number)

In [1997]:
sum(ll) # Pyyhon sum

np.int64(6)

In [1998]:
np.sum(ll) # Numpy sum

np.int64(6)

In [1999]:
# Create a massive Numpy array
massive_array = np.random.random(10000)

In [2000]:
%timeit sum(massive_array)
%timeit np.sum(massive_array)

673 μs ± 22.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
7.02 μs ± 1.62 μs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [2001]:
np.max(massive_array)

np.float64(0.9999709462497284)

In [2002]:
np.min(massive_array)

np.float64(2.0027530115096503e-05)

#### `Math & Arithmetic`

In [2003]:
a = np.array([1, 2, 3])
b = np.array([3, 2, 1])

In [2004]:
np.add(a, b) # a + b

array([4, 4, 4])

In [2005]:
np.subtract(a, b) # a - b

array([-2,  0,  2])

In [2006]:
np.multiply(a, b) # a * b

array([3, 4, 3])

In [2007]:
np.divide(a, b) # a / b

array([0.33333333, 1.        , 3.        ])

In [2008]:
np.divmod(a, b) # a // b

(array([0, 1, 3]), array([1, 0, 0]))

In [2009]:
np.power(a, 2) # a ** 2

array([1, 4, 9])

In [2010]:
np.sqrt(a)

array([1.        , 1.41421356, 1.73205081])

In [2011]:
np.exp(a)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [2012]:
np.log(a)

array([0.        , 0.69314718, 1.09861229])

In [2013]:
np.abs(a)

array([1, 2, 3])

In [2014]:
epsilon = 1e-15
"""
np.clip() is used to limit the values in an array to a specified range.
"""
np.clip(a, epsilon, 1 - epsilon)

array([1., 1., 1.])

### Sorting Arrays
np.sort() uses an quick sort algorithm

In [2015]:
x = np.array([2, 1, 4, 3, 5])
np.sort(x)

array([1, 2, 3, 4, 5])

In [2016]:
# a related function is argsort, which instead returns the indices of the sorted elements
np.argsort(x)

array([1, 0, 3, 2, 4])

#### `Sorting along rows or columns`
NumPy's sorting algorithm is the ability to sort along speccific rows or columns of a multidimensional array using the axis argument

In [2017]:
np.random.seed(42)

MatA = np.random.randint(0, 10, size=(4, 5))

In [2018]:
MatA

array([[6, 3, 7, 4, 6],
       [9, 2, 6, 7, 4],
       [3, 7, 7, 2, 5],
       [4, 1, 7, 5, 1]], dtype=int32)

In [2019]:
np.sort(MatA, axis=0)

array([[3, 1, 6, 2, 1],
       [4, 2, 7, 4, 4],
       [6, 3, 7, 5, 5],
       [9, 7, 7, 7, 6]], dtype=int32)

In [2020]:
np.sort(MatA, axis=1)

array([[3, 4, 6, 6, 7],
       [2, 4, 6, 7, 9],
       [2, 3, 5, 7, 7],
       [1, 1, 4, 5, 7]], dtype=int32)

### Distribution

#### `Uniform Distribution`

In [2021]:
np.random.uniform(low=0.0, high=1.0, size=(3, 4))

array([[0.18182497, 0.18340451, 0.30424224, 0.52475643],
       [0.43194502, 0.29122914, 0.61185289, 0.13949386],
       [0.29214465, 0.36636184, 0.45606998, 0.78517596]])

#### `Normal Distribution`

In [2022]:
np.random.normal(loc=0.0, scale=1.0, size=(3, 4))

array([[ 0.0675282 , -1.42474819, -0.54438272,  0.11092259],
       [-1.15099358,  0.37569802, -0.60063869, -0.29169375],
       [-0.60170661,  1.85227818, -0.01349722, -1.05771093]])

#### `Exponential Distribution`

In [2023]:
np.random.exponential(3.0, size=(3, 4))

array([[ 0.10498116,  7.20126866,  0.89837333,  3.2587674 ],
       [ 1.12063974,  2.20233269,  2.37367139,  0.6131658 ],
       [10.4784214 ,  4.47673613,  8.41528326,  6.75645599]])

#### `Binomial Distribution`

In [2024]:
np.random.binomial(10, 0.5, size=5)

array([5, 7, 3, 4, 2], dtype=int32)

#### `Poisson Distribution`

In [2025]:
np.random.poisson(3.0, size=5)

array([2, 3, 2, 3, 4], dtype=int32)

### Statistics

In [2026]:
np.mean(massive_array)

np.float64(0.5014071673347639)

In [2027]:
np.var(massive_array)

np.float64(0.08264590341248054)

In [2028]:
np.std(massive_array)

np.float64(0.2874820053716068)

In [2029]:
np.cov(massive_array)

array(0.08265417)

In [2030]:
np.corrcoef(massive_array)

np.float64(1.0)

### Linear Algebra

In [2031]:
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
B = np.array([[6, 5], [4, 3], [2, 1]])

In [2032]:
print(A.shape)
print(B.shape)

(3, 3)
(3, 2)


In [2033]:
np.dot(A, B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [2034]:
B.T

array([[6, 4, 2],
       [5, 3, 1]])

In [2035]:
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

In [2036]:
A = np.array([[1, 2], [3, 4]])

In [2037]:
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [2038]:
np.linalg.det(A)

np.float64(-2.0000000000000004)

In [2039]:
eig_vals, eigvecs = np.linalg.eig(A)
print(eig_vals)
print(eigvecs)

[-0.37228132  5.37228132]
[[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]


### Dot Product Example

In [2040]:
import pandas as pd

In [2041]:
np.random.seed(0)

In [2042]:
sales_amounts = np.random.randint(20, size=(5, 3))
print(sales_amounts)

[[12 15  0]
 [ 3  3  7]
 [ 9 19 18]
 [ 4  6 12]
 [ 1  6  7]]


In [2043]:
weekly_sales = pd.DataFrame(sales_amounts, index=["Mon", "Tues", "Wed", "Thurs", "Fri"], 
                            columns=["Almond Butter", "Peanut Butter", "Cashew Butter"])

In [2044]:
print(weekly_sales)

       Almond Butter  Peanut Butter  Cashew Butter
Mon               12             15              0
Tues               3              3              7
Wed                9             19             18
Thurs              4              6             12
Fri                1              6              7


In [2045]:
# create a price array
prices = np.array([10, 8, 12])

In [2046]:
butter_prices = pd.DataFrame(prices.reshape(1, 3), index=["Price"], columns=["Almond Butter", "Peanut Butter", "Cashew Butter"])

In [2047]:
print(butter_prices)

       Almond Butter  Peanut Butter  Cashew Butter
Price             10              8             12


In [2048]:
weekly_sales.shape, butter_prices.shape

((5, 3), (1, 3))

In [2049]:
total_prices = np.dot(weekly_sales, butter_prices.T)

In [2050]:
weekly_sales["Total prices"] = total_prices
print(weekly_sales)

       Almond Butter  Peanut Butter  Cashew Butter  Total prices
Mon               12             15              0           240
Tues               3              3              7           138
Wed                9             19             18           458
Thurs              4              6             12           232
Fri                1              6              7           142
