In [7]:
import numpy as np

## Data Types & Attributes

In [8]:
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [9]:
a2 = np.array([[1,2,3], [4,5,6]])
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [10]:
a3 = np.array([[[1,2,3], [4,5,6]],
              [[7,8,9], [10,11,12]]])

In [11]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [12]:
a1.size, a2.size, a3.size

(3, 6, 12)

In [13]:
a1.dtype

dtype('int64')

In [14]:
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [15]:
type(a1)

numpy.ndarray

In [16]:
# Converting numpy arrays into data frames

import pandas as pd

df1 = pd.DataFrame(a1)
df2 = pd.DataFrame(a2)

In [17]:
df1

Unnamed: 0,0
0,1
1,2
2,3


In [18]:
df2

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


## Arrays

In [19]:
ones = np.ones((3,3), dtype=int)
ones

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [20]:
zeros = np.zeros((2,3), dtype=int)
zeros

array([[0, 0, 0],
       [0, 0, 0]])

In [21]:
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [22]:
randomint_array = np.random.randint(0, 10, size=(2,3))
randomint_array

array([[9, 9, 6],
       [9, 9, 3]])

In [23]:
randomfl_array = np.random.random(size=(3,2))
randomfl_array

array([[0.5034642 , 0.33392643],
       [0.61114605, 0.16333669],
       [0.58269393, 0.82590538]])

In [24]:
randomfl_array2 = np.random.rand(3, 2)
randomfl_array2

array([[0.52625167, 0.74519558],
       [0.25791657, 0.70214676],
       [0.99847857, 0.13027349]])

In [25]:
# Random Seeds - when you re-run this block the random numbers won't change
np.random.seed(4)
randomint_array = np.random.randint(0, 10, size=(2,3))
randomint_array

array([[7, 5, 1],
       [8, 7, 8]])

## Viewing & Slicing Arrays

In [26]:
randomint_array = np.random.randint(0, 10, size=(4,4))
randomint_array

array([[2, 9, 7, 7],
       [7, 9, 8, 4],
       [2, 6, 4, 3],
       [0, 7, 5, 5]])

In [27]:
np.unique(randomint_array)

array([0, 2, 3, 4, 5, 6, 7, 8, 9])

In [28]:
a1

array([1, 2, 3])

In [29]:
a1[0]

1

In [30]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [31]:
a2[1]

array([4, 5, 6])

In [32]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [33]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [34]:
a3[ :1, :1]

array([[[1, 2, 3]]])

In [35]:
a3[ :1, :2, :1]

array([[[1],
        [4]]])

## Arithmetic

In [36]:
a1

array([1, 2, 3])

In [37]:
a1 * ones

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [38]:
ones

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [39]:
ones * a1

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [40]:
a1 * 2

array([2, 4, 6])

In [41]:
a1 + ones

array([[2, 3, 4],
       [2, 3, 4],
       [2, 3, 4]])

In [42]:
a1 + a2

array([[2, 4, 6],
       [5, 7, 9]])

In [43]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [44]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [45]:
a3.shape

(2, 2, 3)

In [46]:
a3 = np.array([[[1, 2, 3],
               [4, 5, 6], 
               [7, 8, 9]],
              [[10, 11, 12],
              [13, 14, 15],
              [16, 17, 18]]])

a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [47]:
a3.shape

(2, 3, 3)

In [48]:
a2.shape

(2, 3)

In [49]:
# cannot * a2 and a3 anymore because the shapes don't match
# i.e. since a2 has the shape of (2,3), it needs to match the last two numbers of the shape of a3 i.e. needs to be equal shape
# another easy option is that one of the shape numbers needs to be 1, i.e. we could also add 1 dimension to a2 and make it (2,3,1)
# therefore we need to reshape a3 into (3,2,3)
# this is because the number of elements in the matrices should be the same in order to reshape i.e. 3*2*3 = 2*3*3

a2

array([[1, 2, 3],
       [4, 5, 6]])

In [50]:
a3.reshape(-1) # flattens an array into 1D

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18])

In [51]:
a3.reshape(3,2,3)

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]],

       [[13, 14, 15],
        [16, 17, 18]]])

In [52]:
a2 * a3.reshape(3,2,3) # now works after reshaping

array([[[  1,   4,   9],
        [ 16,  25,  36]],

       [[  7,  16,  27],
        [ 40,  55,  72]],

       [[ 13,  28,  45],
        [ 64,  85, 108]]])

In [53]:
a1

array([1, 2, 3])

In [54]:
a1 / 2

array([0.5, 1. , 1.5])

In [55]:
a1 % 2

array([1, 0, 1])

In [56]:
a1 // 2 # floor division i.e. cuts out the decimals

array([0, 1, 1])

In [57]:
a1 ** 2 # powers

array([1, 4, 9])

In [58]:
np.exp(a1) # e^n

array([ 2.71828183,  7.3890561 , 20.08553692])

In [59]:
np.log(a1)

array([0.        , 0.69314718, 1.09861229])

## Aggregation

In [60]:
# Normal python lists

l1 = [1,2,3]
type(l1)

list

In [61]:
sum(l1)

6

In [62]:
a1

array([1, 2, 3])

In [63]:
np.sum(a1)

6

In [64]:
a1.sum()

6

In [65]:
# use the numpy sum with numpy types and the python sum with python lists or types in normal python

large_dataset = np.random.random(1000000)
large_dataset

array([0.81194775, 0.61252534, 0.26647378, ..., 0.63898488, 0.0994288 ,
       0.55476681])

In [66]:
# its because numpy sum is a lot more faster and efficient when it comes to large numpy datasets and arrays

%timeit np.sum(large_dataset)
%timeit sum(large_dataset)

187 µs ± 1.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
39 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [67]:
# stats with numpy

a2

array([[1, 2, 3],
       [4, 5, 6]])

In [68]:
a2.std()

1.707825127659933

In [69]:
a2.var()

2.9166666666666665

In [70]:
a2.mean()

3.5

In [71]:
a2.max()

6

In [72]:
a2.min()

1

## Transpose & Reshaping

In [73]:
# Rules for reshaping - needs to be equal or at least one number has to be 1 in the shape numbers
# keep in mind that when broadcasting, it compares the shapes starting at the end
# so (2,3,3) * (1,2,3) wont work since it starts at the ends and notices that 2 does not equal 3
# but (2,3,3) * (2,1,3) will work or also (2,3,3) * (2,3,1)
# and also when using reshape make sure the new shape can fit the same number of elements by multiplying all the numbers

In [74]:
a2.shape, a2

((2, 3),
 array([[1, 2, 3],
        [4, 5, 6]]))

In [75]:
a3.shape, a3

((2, 3, 3),
 array([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]]))

In [76]:
a2.reshape(2,3,1)

array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]])

In [77]:
a3 * a2.reshape(2,1,3)

array([[[  1,   4,   9],
        [  4,  10,  18],
        [  7,  16,  27]],

       [[ 40,  55,  72],
        [ 52,  70,  90],
        [ 64,  85, 108]]])

In [78]:
# Transpose 

a2.shape, a2.T.shape, a2.T

((2, 3),
 (3, 2),
 array([[1, 4],
        [2, 5],
        [3, 6]]))

## Dot Product

In [79]:
# keep in mind the dot product is actually the matrix multiplication

np.random.seed(21)

rand_matrix1 = np.random.randint(25, size=(8,4))
rand_matrix2 = np.random.randint(25, size=(8,4))

rand_matrix1

array([[ 9, 15, 24,  4],
       [16, 16, 24,  3],
       [ 2, 17, 10,  8],
       [14, 12,  9, 22],
       [16, 14,  4,  6],
       [20, 20, 23, 12],
       [24,  9,  5,  6],
       [ 5, 18,  6, 18]])

In [80]:
rand_matrix2

array([[24, 12,  2, 16],
       [21,  5, 16,  3],
       [16, 24,  1, 12],
       [16, 18, 15,  6],
       [20,  9, 16, 13],
       [ 0,  5,  5, 11],
       [22,  3,  4, 12],
       [12,  6,  0,  0]])

In [81]:
rand_matrix1.shape, rand_matrix2.shape

((8, 4), (8, 4))

In [82]:
# 4 and 8 are not equal so we have to transpose in order to use dot product i.e. matrix multiplication

rand_matrix1.T.shape, rand_matrix2.shape

((4, 8), (8, 4))

In [83]:
np.dot(rand_matrix1.T, rand_matrix2)

array([[1716,  834,  938, 1016],
       [1854, 1245,  843, 1074],
       [1646, 1012,  776,  995],
       [1107,  891,  574,  583]])

## Aplications e.g. Price and Sales

In [84]:
# Online protien bar business sales

np.random.seed(10)

sales = np.random.randint(40, size=(7,4))
sales

array([[ 9, 36, 15,  0],
       [28, 25, 29, 29],
       [ 8,  9,  0, 36],
       [16, 36, 11, 24],
       [33,  8, 36, 14],
       [13,  5, 13, 25],
       [13, 28, 22, 30]])

In [85]:
weekly_sales_df = pd.DataFrame(sales, 
                              index=["Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun"], 
                              columns = ["Peanut Butter", "Almond Coconut", "Choco Peanut Butter", "Blueberry Almond"])

weekly_sales_df

Unnamed: 0,Peanut Butter,Almond Coconut,Choco Peanut Butter,Blueberry Almond
Mon,9,36,15,0
Tue,28,25,29,29
Wed,8,9,0,36
Thurs,16,36,11,24
Fri,33,8,36,14
Sat,13,5,13,25
Sun,13,28,22,30


In [86]:
# Prices for each flavor

prices = np.array([3.99, 2.99, 6.99, 4.99])
prices.shape

(4,)

In [87]:
prices_df = pd.DataFrame(prices.reshape(1,4),
                         index = ["Price"],
                        columns=["Peanut Butter", "Almond Coconut", "Choco Peanut Butter", "Blueberry Almond"])

prices_df

Unnamed: 0,Peanut Butter,Almond Coconut,Choco Peanut Butter,Blueberry Almond
Price,3.99,2.99,6.99,4.99


In [88]:
# Calculating total sales

prices.shape, sales.shape

((4,), (7, 4))

In [89]:
total_sales = np.dot(prices.T, sales.T)

In [90]:
total_sales

array([248.4 , 533.89, 238.47, 368.13, 477.09, 282.44, 439.07])

In [91]:
weekly_sales_df["Total Sales"] = total_sales

In [92]:
weekly_sales_df

Unnamed: 0,Peanut Butter,Almond Coconut,Choco Peanut Butter,Blueberry Almond,Total Sales
Mon,9,36,15,0,248.4
Tue,28,25,29,29,533.89
Wed,8,9,0,36,238.47
Thurs,16,36,11,24,368.13
Fri,33,8,36,14,477.09
Sat,13,5,13,25,282.44
Sun,13,28,22,30,439.07


## Comparison Operators

In [93]:
a1

array([1, 2, 3])

In [94]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [95]:
a1 == a2

array([[ True,  True,  True],
       [False, False, False]])

In [96]:
a1 > a2

array([[False, False, False],
       [False, False, False]])

In [97]:
a1 < 5

array([ True,  True,  True])

## Sorting

In [98]:
random_array = np.random.randint(10, size=(2,3))
random_array

array([[9, 1, 9],
       [4, 2, 6]])

In [99]:
np.sort(random_array)

array([[1, 9, 9],
       [2, 4, 6]])

In [100]:
random_array

array([[9, 1, 9],
       [4, 2, 6]])

In [101]:
# this sorts the index's of the array and shows us the ordering of it, i.e. 0 being the lowest and 2 is the highest in this case
np.argsort(random_array)

array([[1, 0, 2],
       [1, 0, 2]])

In [102]:
random_array

array([[9, 1, 9],
       [4, 2, 6]])

In [103]:
np.argmax(random_array, axis=0)

array([0, 1, 0])

In [104]:
np.argmax(random_array, axis=1)

array([0, 2])

In [105]:
np.argmin(random_array) # index of the minimum value

1

In [106]:
np.argmax(random_array)

0

In [107]:
np.argmin(random_array, axis=0)

array([1, 0, 1])

## Converting Images into numpy arrays and numbers

In [113]:
import matplotlib.image as mpimg

![](images/panda.png)

In [115]:
panda_array = mpimg.imread("images/panda.png")

In [249]:
type(panda_array)

numpy.ndarray

In [250]:
panda_array.shape, panda_array.size, panda_array.ndim

((2330, 3500, 3), 24465000, 3)

In [251]:
panda_array[:3]

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]]], dtype=float32)