In [None]:
# Numpy
# Math Module built on C++
# Everything you are about to see we could theoritically build ourselves but it would be a lot more complex and a lot slower
# What Pandas uses in the background for calculations

In [None]:
# Numpy is usually about 50x - 100x than base python
# Arrays, C++ Style Arrays

In [3]:
# np is the standard abbreviation for numpy
import numpy as np

In [4]:
# Col1 Col2 Col3
# Row1 Row1 Row1
# Row2 Row2 Row2

print(np.__version__)

2.2.1


In [5]:
# Array is our most basic "object" in numpy
arr = np.array([1,2,3,4,5])

arr

array([1, 2, 3, 4, 5])

In [20]:
# We create n-dimensional arrrays

# 0-dimensional arrays or scalars
scalar = np.array(42)

scalar

# 1-dimensional arrays or arrays
arr = np.array([1,2,3,4,5])

arr

# 2-dimensional arrays or Matrix
matrix = np.array([[1,2,3],[3,4,5]])

matrix

# 3-dimensional array or Tensor
tensor = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])

tensor

# 20-dimensional array I will not be showing this

# We can always find the dimension of a particular array
print(scalar.ndim, arr.ndim, matrix.ndim, tensor.ndim)

0 1 2 3


In [15]:
# Indexing into our arrays

# In python multi-indexing looks like this
print(matrix[0][1]) # DO NOT DO THIS BAD

# Use Numpy's multi-indexing instead
print(matrix[0, 1]) # GOOD

print(tensor[1, 0, 0])
print(tensor[0, 1, 1])

2
2
7
5


In [27]:
# Data Types
# print(matrix.dtype)

# Numpy has its own datatypes

# i = int - up to int64 other common one is int32 (2^64 - 1) or (2^32 - 1)
# u = unsigned - unsigned ints
# b = bool
# f = floats - float128
# c = complex
# M = datetime
# m = timedelta
# S = Strings
# U = Unicode strings
# O = Object - Often used when more than one datatype is present
# V = Void  - Reserved blocks of data
# 129, 12390, 4709, Null


# We can specify the dtype of our array
arr = np.array([1,2,3,4,5], dtype= 'f')
print(arr.dtype)

# If possible we can also convert/cast between types of existing arrays
new_arr = arr.astype('i')
new_arr

float32


array([1, 2, 3, 4, 5], dtype=int32)

In [28]:
# Shapes of our Matricies can be found
print(arr.shape)
print(matrix.shape)
print(tensor.shape)

(5,)
(2, 3)
(2, 2, 3)


In [33]:
# We can reshape our arrays

arr = np.array([1,2,3,4,5,6,7,8,9,10,11,12])
print(arr.shape)
print(arr)
arr = arr.reshape(2,2,3)
print(arr.shape)
print(arr)

(12,)
[ 1  2  3  4  5  6  7  8  9 10 11 12]
(2, 2, 3)
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]


In [40]:
# What if we know how many rows/columns we want but don't know how to make the math work
arr = np.array([1,2,3,4,5,6,7,8,9,10,11,12])

arr = arr.reshape(4,-1)
print(arr)


[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [41]:
# Flattening our matrix

arr = arr.reshape(-1)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [43]:
# Iterating through arrays

# Python Method
for num in arr:
    print(num)

print()
# Numpy Method
for num in np.nditer(arr):
    print(num)

1
2
3
4
5
6
7
8
9
10
11
12

1
2
3
4
5
6
7
8
9
10
11
12


In [46]:
# Joining Arrays
# Join based off the axis
# axis = 0 is joining based on rows
# axis = 1 is joining based on columns

arr1 = np.array([[1,2,3],[4,5,6]])
arr2 = np.array([[7,8,9],[10,11,12]])

print(arr1)
print(arr2)

print()

print("Joining Array based on rows")
row_arr = np.concatenate((arr1, arr2), axis=0)
print(row_arr)

print()

print("Joining Array based on columns")
col_arr = np.concatenate((arr1, arr2), axis=1)
print(col_arr)

[[1 2 3]
 [4 5 6]]
[[ 7  8  9]
 [10 11 12]]

Joining Array based on rows
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]

Joining Array based on columns
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]


In [49]:
# Split arrays into n parts

arr = np.array(["Grass", "Poison"])
split_arr = np.array_split(arr, 2)

split_arr

[array(['Grass'], dtype='<U6'), array(['Poison'], dtype='<U6')]

In [52]:
# Find the positions of certain data

arr = np.array([1,2,3,2,4,5,2,2,3,2])
pos = np.where(arr == 2)

print(pos)

print(arr[1])


(array([1, 3, 6, 7, 9]),)
2


In [54]:
# SearchSorted()

arr = np.array([1,2,3,4,5,6,7,8,9])
x = np.searchsorted(arr, 4)

print(x)

3


In [55]:
# sort your data

arr = np.array([5,2,3,1,5,9,8,11,6])
print(arr)
sorted_arr = np.sort(arr)
print(sorted_arr)

[ 5  2  3  1  5  9  8 11  6]
[ 1  2  3  5  5  6  8  9 11]


In [62]:
arr = np.array([[3,1,2],[9,7,8],[5,6,4]])
sorted_arr = np.sort(arr)
print(arr)
print()
print(sorted_arr)

[[3 1 2]
 [9 7 8]
 [5 6 4]]

[[1 2 3]
 [7 8 9]
 [4 5 6]]


In [64]:
# Filtering
# Filtering is based on boolean values

arr = np.array([1,2,3,4,5,6])
truth_table = [False, True, True, False, True, False]
filtered_array = arr[truth_table]

print(arr)
print()
print(filtered_array)

[1 2 3 4 5 6]

[2 3 5]


In [67]:
# Copies vs Views
# View is essentially a "Shallow Copy"
# Copy is a "Deep Copy"
# 8 GB w/View -> 8.01 GB
# 8 GB w/Copy -> 16 GB File

arr = np.array([1,2,3,4])
view = arr.view()
copy = arr.copy()
print(arr)
print(view)
print(copy)
print()

arr[0] = 15
print(arr)
print(view)
print(copy)
print()

print(view.base)
print(copy.base)

[1 2 3 4]
[1 2 3 4]
[1 2 3 4]

[15  2  3  4]
[15  2  3  4]
[1 2 3 4]

[15  2  3  4]
None


In [68]:
import time

In [74]:
# Python
start = time.time()
list = [i for i in range(1_000_000_000)]
squared = [i**2 for i in list]
stop = time.time()

print(stop-start)

139.3277449607849


In [75]:
# Numpy

start = time.time()
list = np.arange(1_000_000_000)
squared = list**2
stop = time.time()

print(stop-start)

60.49002408981323
