In [1]:
import numpy as np

## Creating Numpy Arrays form Python Lists

In [2]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [3]:
np.array([1.2, 2, 3, 4])

array([1.2, 2. , 3. , 4. ])

In [4]:
a1 = np.array([1.2, 2, 3, 4])

In [5]:
type(a1)

numpy.ndarray

In [6]:
np.array([1.2, 2, 3],)

array([1.2, 2. , 3. ])

In [9]:
a2 = np.array([[1, 2, 3],
         [4, 5, 6]])

In [10]:
type(a2)

numpy.ndarray

In [14]:
a2.shape # cột & hàng

(2, 3)

In [15]:
a2.ndim # số chiều của mảng

2

In [16]:
a2.dtype

dtype('int32')

In [17]:
a2.size # tổng elements

6

## Creating Numpy Arrays from Scratch

### zeros, ones, full, arange, linspace  (shift + tab to see detail)

In [18]:
np.zeros([2,4]) # shift + tab to see detail

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [19]:
np.zeros([2,4], dtype = int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [20]:
np.ones([3,5], dtype = float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [21]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [23]:
np.full((3,5), 6.9)

array([[6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9]])

In [26]:
# Create an array of five values evenly spaced betwwen 0 and 1
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### random

In [27]:
np.random.random((4,4))

array([[0.73275211, 0.19408752, 0.34326006, 0.50539728],
       [0.54713573, 0.19524403, 0.55596378, 0.37467769],
       [0.91517317, 0.12047444, 0.26798447, 0.03088734],
       [0.68316952, 0.23860736, 0.54065711, 0.30560904]])

In [28]:
np.random.normal(0,1, (3,3))

array([[-0.31762738,  0.83964985,  1.8078128 ],
       [ 0.74651888, -1.64854618,  1.69362519],
       [-0.87795347, -0.97041883, -0.82402123]])

In [30]:
np.random.randint(0,10, (4,5))

array([[0, 7, 9, 8, 5],
       [5, 0, 9, 8, 9],
       [3, 1, 4, 0, 8],
       [5, 3, 4, 1, 9]])

## Array indexing & Slicing

### One-dimensional subarray

In [31]:
x1 = np.random.randint(20, size=6)

In [32]:
x1

array([ 5, 12,  6,  2,  5, 19])

### Multi-dimensional Array

In [33]:
x2 = np.random.randint(10, size=(3,4))

In [34]:
x2

array([[5, 8, 8, 5],
       [1, 5, 0, 2],
       [0, 2, 5, 6]])

### Slicing 

#### x[start:stop:step]

In [35]:
x1

array([ 5, 12,  6,  2,  5, 19])

In [36]:
x1[0:3]

array([ 5, 12,  6])

In [37]:
x2

array([[5, 8, 8, 5],
       [1, 5, 0, 2],
       [0, 2, 5, 6]])

In [38]:
x2[:2,:3]

array([[5, 8, 8],
       [1, 5, 0]])

## Reshaping of Arrays & Transpose

In [40]:
grid = np.arange(1,10)

In [41]:
grid.shape

(9,)

In [42]:
grid.reshape((3,3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [44]:
x = np.array([[1.,2.],
            [3.,4.]])

In [45]:
x

array([[1., 2.],
       [3., 4.]])

In [46]:
x.T # Transpose

array([[1., 3.],
       [2., 4.]])

## Array Concatenation and Splitting

In [47]:
x = np.array([1,2,3])
y = np.array([3,2,1])

In [49]:
np.concatenate((x,y)) # Nối 2 array

array([1, 2, 3, 3, 2, 1])

In [50]:
grid = np.array([[1,2,3],
                [4,5,6]])

In [51]:
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [52]:
np.concatenate((grid, grid)) # axis = 0 by default -> chiều ngang

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [53]:
np.concatenate((grid, grid), axis = 1) # axis = 1 

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [54]:
# vstack -> vertical stack
x = np.array([1,2,3])

grid = np.array([[9,8,7],
                [6,5,4]])

In [55]:
np.vstack((x, grid))

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [57]:
# hstack -> horizontal stack
y = np.array([[99],
             [99]])
np.hstack((y, grid))

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

## Splitting of arrays

In [58]:
x = np.array([1,2,3,99,69,3,2,1])

In [60]:
x1, x2, x3 = np.split(x, [3,5])

In [61]:
x1

array([1, 2, 3])

In [62]:
x2

array([99, 69])

In [65]:
x3

array([3, 2, 1])

## Broadcasting and Vectorized operations
Broadcasting is simply a set of rules for applying binary ufuncs (e.g., addition, subtraction, multiplication, etc.) on arrays of different sizes.

![image.png](attachment:image.png)

In [66]:
a = np.arange(3)

In [67]:
a

array([0, 1, 2])

In [69]:
a + 5 #Broadcasting

array([5, 6, 7])

In [71]:
b = np.ones((3,3))

In [72]:
b

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [73]:
a.shape, b.shape

((3,), (3, 3))

In [74]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [75]:
a * b

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [76]:
c = np.arange(3).reshape((3,1))

In [77]:
c

array([[0],
       [1],
       [2]])

In [78]:
a

array([0, 1, 2])

In [79]:
a + c

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

## Manipulating & Comparing Arrays

### Aggregation
Aggregation = performkng the same operation on a number of things

In [80]:
list_number = [1,2,3]

In [81]:
ll = np.array(list_number)

In [82]:
ll

array([1, 2, 3])

In [87]:
sum(ll) #Python sum()

6

In [86]:
np.sum(ll) #Numpy sum()

6

In [89]:
# Create a massive Numpy array

massive_array = np.random.random(10000)
massive_array[:5]
massive_array.size

10000

In [90]:
# So sánh thời gian hàm tính toán
%timeit sum(massive_array) #Python built-in function
%timeit np.sum(massive_array) #Numpy built-in function

545 µs ± 45 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
5.85 µs ± 194 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [91]:
np.mean(massive_array) #Average value

0.5005677169878936

In [92]:
np.max(massive_array)

0.9998965690453677

In [93]:
np.min(massive_array)

7.94720007101235e-05

## Standard Deviation & Variance
Độ lệch chuẩn và Phương sai

In [94]:
dog_height = [600, 470, 170, 430, 300]
dog_height = np.array(dog_height)

np.std(dog_height) #Standard Deviation

147.32277488562318

In [95]:
np.var(dog_height) #Variance

21704.0

## Sorting Arrays
np.sort uses an quicksort algorithm

In [96]:
x = np.array([2,1,4,3,5])
np.sort(x)

array([1, 2, 3, 4, 5])

In [98]:
#A related function is argsort, which instead returns the indices of the sorted elements:
np.argsort(x) #Returns index of sorted-value from original arrays

array([1, 0, 3, 2, 4], dtype=int64)

### Sorting along rows or columns
NumPy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the axis argument

In [99]:
np.random.seed(42)

MatA = np.random.randint(0, 10, size=(4,6))

In [100]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [101]:
np.sort(MatA, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [102]:
np.sort(MatA, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

## Linear Algebra
Đại số tuyến tính

In [105]:
A = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])

In [106]:
B = np.array([[6,5],
              [4,3],
              [2,1]])

In [107]:
#A(3x2) dot product B(3x2)

In [108]:
A.dot(B) #Tích vô hướng

array([[20, 14],
       [56, 41],
       [92, 68]])

In [109]:
A@B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [110]:
#B(3x2) dot A(3x3) -> Không được vì không cùng dimentions
B.T #Transpose to (2x3)
#A(xA,yA); B(xB,yB) -> Để có thể A.dot(B) -> xA = yB

array([[6, 4, 2],
       [5, 3, 1]])

In [111]:
B.T.dot(A)

array([[36, 48, 60],
       [24, 33, 42]])

### Dot Product Example

In [117]:
#Number of jars sold
np.random.seed(0)

sales_amounts = np.random.randint(20, size=(5,3))

In [118]:
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [119]:
# Create weekly_sales DataFrame
import pandas as pd

weekly_sales = pd.DataFrame(sales_amounts, index=["Mon", "Tues", "Wed", "Thurs", "Fri"],
                                          columns=["Almond Butter", "Peanut Butter", "Casnew Butter"])

In [120]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Casnew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [124]:
# Create a price array
prices = np.array([10,8,12])
prices.shape # result: 3 hàng 1 cột

(3,)

In [125]:
butter_prices = pd.DataFrame(prices.reshape(1,3), index=["Price"], columns=["Almond Butter", "Peanut Butter", "Casnew Butter"])
# Phải reshape prices thành 3 cột 1 hàng

In [123]:
butter_prices

Unnamed: 0,Almond Butter,Peanut Butter,Casnew Butter
Price,10,8,12


In [126]:
weekly_sales.shape, butter_prices.shape

((5, 3), (1, 3))

In [129]:
total_prices = weekly_sales.dot(butter_prices.T)

In [130]:
total_prices

Unnamed: 0,Price
Mon,240
Tues,138
Wed,458
Thurs,232
Fri,142


In [131]:
weekly_sales["Total Price"] = total_prices
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Casnew Butter,Total Price
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142
