In [1]:
# Numpy
# Numpy is what is happening in the background Pandas
# Numpy is built on C++
# We could potentially do everything we are about ourselves
# But that would be incredibly ineffectient an take a lot of time

In [2]:
# Numpy is a lot more effiecient than Python
# Numpy is ~50x - 100x faster than Python
# Arrays, C++ Arrays = Contiguous memory, optimized searches
# Lists in python, can contain multiple types, and are not "Arrays"
# Numpy Arrays are all one data type, and it is stored in an nd-array

In [4]:
# Let's start using it
import numpy as np

print(np.__version__)

2.1.3


In [35]:
# Creating an array
arr = np.array([1, 2, 3, 4, 5])
list = [1, 2, 3, 4, 5]
print(type(arr))

# np array doesn't have comma between the value (index)

print(arr)
print(list)

<class 'numpy.ndarray'>
[1 2 3 4 5]
[1, 2, 3, 4, 5]


In [36]:
# Dimensions - n-dimensional array
# 0D ~ 3D arrays
zero_arr = np.array(42)
print(zero_arr)

# 1D Array AKA a Array
one_arr = np.array([1, 2, 3, 4, 5])
print(one_arr)

# 2D Array AKA a Matrix
two_arr = np.array([[1, 2, 3], [4, 5, 6]])
print(two_arr)

# 3D Array AKA a Tensor
three_arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
print(three_arr)

print("\nDimentsions")
print(zero_arr.ndim, one_arr.ndim, two_arr.ndim, three_arr.ndim)


42
[1 2 3 4 5]
[[1 2 3]
 [4 5 6]]
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]

Dimentsions
0 1 2 3


In [37]:
# Indexing into our nd arrays

# Python indexing - Do not do this, we have to use numpy indexing
print(two_arr[0][1])

# Numpy indexing
print(two_arr[0, 1])

print(three_arr[1, 1, 2])


2
2
12


In [43]:
# Numpy arrays are a single data type

# Numpy has its own data types - only one type is allowed

# i - integers up int64
# b - boolean
# u - unsigned ints
# f - float - float128
# c - complex
# m - timedelta
# M - datetime
# O - object
# S - string
# U - unicode strings
# V - void type - fixed chunk of memory that is reserved

print(arr.dtype)

arr2 = np.array([1, 2, 3, 4, 5, 6], dtype='f')
new_arr = arr2.astype('S')

print(arr2.dtype)
print(new_arr.dtype)

int64
float32
|S32


In [47]:
# Checking our matrixes shape

print(arr.shape)
print(two_arr.shape)
print(three_arr.shape)

(5,)
(2, 3)
(2, 2, 3)


In [51]:
# Reshape array
reShapedArr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
print(reShapedArr)
new_arr_2 = reShapedArr.reshape([4, 3]) # [row, col]
print(new_arr_2)

[ 1  2  3  4  5  6  7  8  9 10 11 12]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [52]:
# If we do not know our full dimensions we can substitute in a  -1
new_arr_3 = reShapedArr.reshape(3, -1)
print(new_arr_3)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [53]:
# To flatten our arrays, get back to 1-d array
reShapedArr = new_arr_3.reshape(-1)

print(reShapedArr)



[ 1  2  3  4  5  6  7  8  9 10 11 12]


In [56]:
# Iterating through arrays
for x in np.nditer(three_arr):
    print(x)

1
2
3
4
5
6
7
8
9
10
11
12


In [61]:
# Joining arrays
# Join based on axis
# axis = 0 is based on rows, axis = 1 is based on columns

arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
arr = np.concatenate((arr1, arr2), axis = 0)
print(arr)

arrBasedCol = np.concatenate((arr1, arr2), axis = 1)
print(arrBasedCol)

[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


In [65]:
# Stacking
# Stack(), hstack(), dstack()
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr = np.stack((arr1, arr2))

print(arr)

arrHrizontal = np.hstack((arr1, arr2))
print(arrHrizontal)

arrDepth = np.dstack((arr1, arr2))
print(arrDepth)

[[1 2 3]
 [4 5 6]]
[1 2 3 4 5 6]
[[[1 4]
  [2 5]
  [3 6]]]


In [66]:
# Split array into n number of parts

arr = np.array([1, 2, 3, 4, 5, 6])
new_arr = np.array_split(arr, 4)
print(new_arr)

[array([1, 2]), array([3, 4]), array([5]), array([6])]


In [76]:
# Searching our arrays is done with the where() mehtod

arr = np.array([1, 2, 3, 4, 5, 6, 4, 4])
x = np.where(arr == 4) # return indexes which their value is matched with "4"

print(x)

(array([3, 6, 7]),)


In [77]:
# Searchsorted()

arr = np.array([1, 2, 3, 4, 5, 6, 4, 4])
x = np.searchsorted(arr, 6) # return indexes which their value is matched with "6" after sorted
print(x)

8


In [78]:
# Sorting arrays
# Can still use the python sort() method
# or use np.sort() method

arr = np.array([[5, 2, 1], [6, 3, 9]])

print(np.sort(arr))

[[1 2 5]
 [3 6 9]]


In [80]:
# Filtering our arrays
# Filter based off a condition
# or we can use a boolean array

arr = np.array([40, 41, 42, 430])

x = [False, True, False, True]

filtered_arr = arr[x] # return a valu of the index placed with True

print(filtered_arr)

[ 41 430]


In [84]:
# Copies vs Views
# When we work with Big Data, we need to carefully consider what we copy
# Use a View - It is the same as reference in Javascript

arr = np.array([1, 2, 3, 4])
view = arr.view()
copy = arr.copy()
arr[0] = 15
print(arr)
print(view)
print(copy)

print(view.base)
print(copy.base) # it will be None because it didn't reference to any

[15  2  3  4]
[15  2  3  4]
[1 2 3 4]
[15  2  3  4]
None


In [85]:
import time

In [92]:
# Use Python List
start = time.time()
list = [i for i in range(1_000_000)]
squared = [x**2 for x in list]
stop =time.time()

print(stop-start)

0.1332719326019287


In [94]:
# Use Numpy arr
start = time.time()
arr = np.arange(1_000_000)
squared = arr**2
stop =time.time()

print(stop-start)

0.0044667720794677734
