According to Python, "NumPy is the fundamental package for scientific computing with Python."
It's used for working with arrays. NumPy arrays can be processed faster than Python lists.

In [7]:
# the conventional way to install numpy
import numpy as np
np.__version__

'1.26.3'

In [8]:
arr = np.array([1, 2, 3, 4])
arr

array([1, 2, 3, 4])

Arrays have dimensions. 0-D arrays are just numbers. They're called scalars. 1-D arrays have 0-D arrays as all their elements. 2-D arrays have 1-D arrays as its elements. These can be used to represent matrices.

In [9]:
twoD = np.array([[1, 2, 3], [4, 5, 6]])
# if there's two sets of brackets, then there are two dimensions
twoD

array([[1, 2, 3],
       [4, 5, 6]])

In [10]:
arr.ndim, twoD.ndim

(1, 2)

In [11]:
arr[0], arr[1]

(1, 2)

In [12]:
twoD[0], twoD[0, 0]

(array([1, 2, 3]), 1)

In [13]:
arr2 = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr2.ndim

3

In [14]:
arr2, arr2[1], arr2[1, 1], arr2[1, 1, 1]

(array([[[ 1,  2,  3],
         [ 4,  5,  6]],
 
        [[ 7,  8,  9],
         [10, 11, 12]]]),
 array([[ 7,  8,  9],
        [10, 11, 12]]),
 array([10, 11, 12]),
 11)

In [15]:
arr = np.array([1, 2, 3, 4, 5, 6, 7])
# slicing
arr[2:5]

array([3, 4, 5])

In [16]:
arr[1:6:2]

array([2, 4, 6])

In [17]:
arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
arr[1, 1:4:2]

array([7, 9])

In [18]:
arr[0:2, 3]

array([4, 9])

In [19]:
arr[0:2, 1:4:2]

array([[2, 4],
       [7, 9]])

NumPy has different data types.
i - integer
b - boolean
u - unsigned integer
f - float
c - complex float
m - timedelta
M - datetime
O - object
S - string
U - unicode string
V - fixed chunk of memory for other type ( void )

In [20]:
arr = np.array([1, 2, 3])
arr, arr.dtype

(array([1, 2, 3]), dtype('int32'))

In [21]:
arr = np.array([1, 2, 3], dtype='float')
arr, arr.dtype

(array([1., 2., 3.]), dtype('float64'))

In [22]:
arr2 = arr.astype('int')
arr2, arr.dtype

(array([1, 2, 3]), dtype('float64'))

Copies do not affect original data. Views do affect original data and vice versa.

In [23]:
arr = np.array([1, 2, 3])
arr2 = arr.copy()
arr2[0] = 7
arr, arr2

(array([1, 2, 3]), array([7, 2, 3]))

In [24]:
arr3 = arr2.view()
arr3[1] = 0
arr2, arr3

(array([7, 0, 3]), array([7, 0, 3]))

In [25]:
# if the base attribute of an array is None, then the array is a copy
arr.base, arr2.base, arr3.base

(None, None, array([7, 0, 3]))

In [26]:
arr = np.array([1,2,3])
arr2 = np.array([[1,2,3], [4, 5, 6]])
arr.shape, arr2.shape

((3,), (2, 3))

In [27]:
arr = np.array([1, 2, 3, 4], ndmin=3)
arr.ndim, arr.shape

(3, (1, 1, 4))

In [28]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
arr2 = arr.reshape(4, 3)
print((arr, arr.shape, arr2, arr2.shape))

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]), (12,), array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]]), (4, 3))


In [29]:
arr = np.array([1, 2, 3, 4])
arr2 = arr.reshape(2, 2)
# arr2 is a view
arr2.base

array([1, 2, 3, 4])

In [30]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])
# NumPy will figure out what the missing dimension needs to be
arr2 = arr.reshape(2, 2, -1)
arr2

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [31]:
arr2.reshape(-1)

array([1, 2, 3, 4, 5, 6, 7, 8])

In [32]:
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
for i in np.nditer(arr):
    print(i)

1
2
3
4
5
6
7
8
9
10
11
12


In [33]:
# we have to buffer so numpy has time to change from int to float
for i in np.nditer(arr, flags=['buffered'], op_dtypes='float'):
    print(i)

1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0
10.0
11.0
12.0


In [34]:
for i in arr[:, :, 1]:
    print(i)

[2 5]
[ 8 11]


In [35]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
for i, x in np.ndenumerate(arr):
    print(i, x)

(0, 0) 1
(0, 1) 2
(0, 2) 3
(1, 0) 4
(1, 1) 5
(1, 2) 6


In [36]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
np.concatenate((arr1, arr2))  # arrays are stacked next to each other

array([1, 2, 3, 4, 5, 6])

In [37]:
# stack puts the second array on a new axis
# there are lots of ways to stack
np.stack((arr1, arr2))

array([[1, 2, 3],
       [4, 5, 6]])

In [38]:
# by default axis=0
# axis 1 means the inputs are stacked column-wise --> same as dstack
# axis 0 means the inputs are stacked row-wise --> same as vstack
np.stack((arr1, arr2), 1)

array([[1, 4],
       [2, 5],
       [3, 6]])

In [39]:
np.hstack((arr1, arr2))  # arrays are stacked next to each other horizontally

array([1, 2, 3, 4, 5, 6])

In [40]:
np.vstack((arr1, arr2))  # arrays are stacked on top of each other vertically

array([[1, 2, 3],
       [4, 5, 6]])

In [41]:
np.dstack((arr1, arr2))  # arrays are stacked next to each other

array([[[1, 4],
        [2, 5],
        [3, 6]]])

In [42]:
arr = np.array([1, 2, 3, 4, 5])
np.array_split(arr, 3)

[array([1, 2]), array([3, 4]), array([5])]

In [43]:
arr = np.array([0, 1, 1, 0, 1, 0, 1, 0, 0])
np.where(arr == 0)

(array([0, 3, 5, 7, 8], dtype=int64),)

In [44]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
np.where(arr%2 == 0)

(array([1, 3, 5, 7, 9], dtype=int64),)

In [45]:
arr = np.array([5, 6, 7, 8])
# preforms binary search
# assumes the array is sorted
# by default searches from the left
np.searchsorted(arr, 5, 'right')

1

In [46]:
arr = np.array([1, 3, 5, 7])
# if the numbers aren't in the array, it returns where they would be inserted to maintain the order
np.searchsorted(arr, (2, 6))

array([1, 3], dtype=int64)

In [47]:
arr = np.array([3, 2, 0, 1])
np.sort(arr)

array([0, 1, 2, 3])

In [48]:
arr = np.array([[0, 2, 1], [5, 4, 3]])
np.sort(arr)

array([[0, 1, 2],
       [3, 4, 5]])

In [49]:
arr = np.array([1, 2, 3, 4])
boolArr = np.array([True, True, False, True])
arr[boolArr]

array([1, 2, 4])

In [50]:
from numpy import random
arr = random.randint(100, size=5)
filter = []
for i in arr:
    filter.append(i>50)
arr, filter, arr[filter]

(array([73, 57, 80, 95, 30]),
 [True, True, True, True, False],
 array([73, 57, 80, 95]))

In [51]:
arr = random.randint(100, size=4)
filter = arr > 49
arr, filter, arr[filter]

(array([89, 33, 88,  9]), array([ True, False,  True, False]), array([89, 88]))

In [52]:
import torch
arr = np.array([1, 2, 3])
torch.from_numpy(arr)

tensor([1, 2, 3], dtype=torch.int32)

In [53]:
t = torch.tensor([1, 2, 3])
torch.Tensor.numpy(t)

array([1, 2, 3], dtype=int64)

"pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language."
Pandas is a python module that is easily installed from pip. It works will with numpy and is used for data analysis.

In [58]:
import pandas as pd
a = np.array([1, 2, 3])
pd.Series(a)

0    1
1    2
2    3
dtype: int32

In [59]:
# by default the index is 0, 1, 2, etc.
var = pd.Series(a, ['x', 'y', 'z'])
var, var['x']

(x    1
 y    2
 z    3
 dtype: int32,
 1)

In [63]:
numbers = {'num1': 9, 'num2': 42, 'num3': 162}
pd.Series(numbers)

num1      9
num2     42
num3    162
dtype: int64

In [64]:
pd.Series(numbers, ['num1', 'num3'])

num1      9
num3    162
dtype: int64

In [70]:
data = {'x': [1, 2, 3],
        'y': [1, 4, 9]}
frame = pd.DataFrame(data)
# loc means locate
frame

Unnamed: 0,x,y
0,1,1
1,2,4
2,3,9


In [71]:
frame.loc[1]

x    2
y    4
Name: 1, dtype: int64

In [73]:
frame.loc[[0, 1]]

Unnamed: 0,x,y
0,1,1
1,2,4


In [103]:
giants = {
    'Devin Singletary': {'Carries': 42, 'Yards': 197, 'Touchdowns': 2, 'Fumbles': 2},
    'Daniel Jones': {'Carries': 19, 'Yards': 67, 'Touchdowns': 0, 'Fumbles': 1},
}
data = pd.DataFrame(giants)
# by default the number of lines in head is 5
data.head(2)

Unnamed: 0,Devin Singletary,Daniel Jones
Carries,42,19
Yards,197,67


In [98]:
data.tail(2)

Unnamed: 0,Devin Singletary,Daniel Jones
Touchdowns,2,0
Fumbles,2,1


In [104]:
data.describe()

Unnamed: 0,Devin Singletary,Daniel Jones
count,4.0,4.0
mean,60.75,21.75
std,92.769877,31.404617
min,2.0,0.0
25%,2.0,0.75
50%,22.0,10.0
75%,80.75,31.0
max,197.0,67.0
