## NumPy Basics

In [1]:
import numpy as np

### Introduction to NumPy

In [2]:
# Why NumPy?

# - Python has inbuilt lists data type
# - Inbuilt lists allow heterogeneous data
# - Numpy was built for the use cases where we are dealing with data of homogenous type
# - Allowing for faster and memory efficient computation
# - numpy also defines high level syntax for commonly performed computation on these kinds of data


# - Most operations of Numpy are vectorised and employ broadcasting
#   - Vectorisation is the absence of explicit looping while performing operations
#     (these looping operations are implemented behind the scenes using precompiled C code)
#   - The term broadcasting (this is known as recycling in R) describes how NumPy treats arrays with different shapes during arithmetic operations.
#     Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes

In [3]:
# Most NumPy arrays have some restrictions. For instance:

# - All elements of the array must be of the same type of data.
# - Once created, the total size of the array can’t change.
# - The shape must be “rectangular”, not “jagged”; e.g., each row of a two-dimensional array must have the same number of columns.

# When these conditions are met, NumPy exploits these characteristics to make the array faster, more memory efficient,
# and more convenient to use than less restrictive data structures.

### Initialising Arrays

In [4]:
# One way to initialize an array is using a Python sequence, such as a list. For example:

a = np.array([1, 2, 3, 4, 5, 6])

print(a)
type(a)

[1 2 3 4 5 6]


numpy.ndarray

In [5]:
# 2 dimensional array

b = np.array([[1, 2, 3], [4, 5, 6]])

print(b)
type(b)

[[1 2 3]
 [4 5 6]]


numpy.ndarray

In [6]:
#### IMPORTANT!

# Note that this 1 dimensional array
a = np.array([1, 2, 3])

# Is different from this 2 dimensional array with just 1 row
b = np.array([[1, 2, 3]])

# Is different from this 2 dimensional array with just 1 column
c = np.array([[1], [2], [3]])

print(a.shape)
print(b.shape)
print(c.shape)

(3,)
(1, 3)
(3, 1)


In [13]:
# Zeros

np.zeros(5)

array([0., 0., 0., 0., 0.])

In [14]:
# Ones

np.ones(12)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [15]:
# Empty (Random values depending on state of the memory)

np.empty(5)

array([0., 0., 0., 0., 0.])

In [16]:
# arange (default starting number 0, default step size = 1)

np.arange(12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [17]:
# arange (starting number, end number and step size)

np.arange(5, 25, 5)

array([ 5, 10, 15, 20])

In [18]:
# 5 elements spaced linearly between 0 and 10

np.linspace(0, 10, num=5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [19]:
# specify data type
x = np.ones(3, dtype=np.int64)

print(x)

[1 1 1]


### Structured (Named) arrays

In [202]:
# np.str_ is a string type in numpy, it requires a length to be specified
dtype = [("first_name", np.str_, (16)), ("last_name", "S10"), ("age", np.int32), ("math_score", np.float16), ("physics_score", np.float16)]
dtype

[('first_name', numpy.str_, 16),
 ('last_name', 'S10'),
 ('age', numpy.int32),
 ('math_score', numpy.float16),
 ('physics_score', numpy.float16)]

In [203]:
array_values = [("Chaitanya", "Anand", 34, 60.343, 70.4),
                ("Bob", "Marley", 12, 65.4333, 70.456),
                ("Albert", "Einstein", 70, 100, 100)]
array_values

[('Chaitanya', 'Anand', 34, 60.343, 70.4),
 ('Bob', 'Marley', 12, 65.4333, 70.456),
 ('Albert', 'Einstein', 70, 100, 100)]

In [204]:
a = np.array(array_values, dtype = dtype)
a

array([('Chaitanya', b'Anand', 34,  60.34,  70.4 ),
       ('Bob', b'Marley', 12,  65.44,  70.44),
       ('Albert', b'Einstein', 70, 100.  , 100.  )],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [205]:
a["first_name"]

array(['Chaitanya', 'Bob', 'Albert'], dtype='<U16')

In [206]:
a["age"] > 20

array([ True, False,  True])

In [207]:
a[a["age"] > 20]

array([('Chaitanya', b'Anand', 34,  60.34,  70.4),
       ('Albert', b'Einstein', 70, 100.  , 100. )],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [208]:
a[a["math_score"] > 80]

array([('Albert', b'Einstein', 70, 100., 100.)],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [209]:
# NOTE: This won't work with byte string
a[a["last_name"] == "Anand"]

array([],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [210]:
a[a["last_name"] == b"Anand"]

array([('Chaitanya', b'Anand', 34, 60.34, 70.4)],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [211]:
# First name is not a byte string
a[a["first_name"] == "Chaitanya"]

array([('Chaitanya', b'Anand', 34, 60.34, 70.4)],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

### Indexing and slicing and dicing

In [262]:
a = np.array([1, 2, 3, 4, 5, 6])
a

array([1, 2, 3, 4, 5, 6])

In [266]:
# 2 dimensional array

b = np.array([[1, 2, 3], [4, 5, 6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [267]:
# Indexing starts at 0 and slicing and dicing is possible as follows

# Access a single element
print(a[0])
print(b[0, 0])

# Slice notation
print(a[2:])
print(b[0, 1:])
print(b[1:, 1:])

# Dicing notation
print(a[2:4])



1
1
[3 4 5 6]
[2 3]
[[5 6]]
[3 4]


In [268]:
b = np.array([[-1, -2, -3, -4], [9, 10, 11, 12], [300, 400, 500, 600], [23, 56, 32, 87]])

In [269]:
b

array([[ -1,  -2,  -3,  -4],
       [  9,  10,  11,  12],
       [300, 400, 500, 600],
       [ 23,  56,  32,  87]])

In [273]:
b[1]

array([ 9, 10, 11, 12])

In [275]:
b[:,1]

array([ -2,  10, 400,  56])

In [277]:
b[1:]

array([[  9,  10,  11,  12],
       [300, 400, 500, 600],
       [ 23,  56,  32,  87]])

In [278]:
# same thing
b[1:,:]

array([[  9,  10,  11,  12],
       [300, 400, 500, 600],
       [ 23,  56,  32,  87]])

In [279]:
b[:1]

array([[-1, -2, -3, -4]])

In [280]:
b[0]

array([-1, -2, -3, -4])

In [281]:
b[1]

array([ 9, 10, 11, 12])

In [282]:
b[1, 2]

np.int64(11)

In [283]:
b[1:, 2:]

array([[ 11,  12],
       [500, 600],
       [ 32,  87]])

In [284]:
b[1:3]

array([[  9,  10,  11,  12],
       [300, 400, 500, 600]])

In [285]:
b[:,2:]

array([[ -3,  -4],
       [ 11,  12],
       [500, 600],
       [ 32,  87]])

In [291]:
a

array([1, 2, 3, 4, 5, 6])

In [287]:
a[2]

np.int64(3)

In [289]:
a[2:]

array([3, 4, 5, 6])

In [290]:
a[:2]

array([1, 2])

In [295]:
a[2:5]

array([3, 4, 5])

### Attributes of an array object

In [15]:
 # ndim, shape, size, and dtype

In [16]:
# Number of dimensions in the array (ndim)
print(a.ndim)
print(b.ndim)

1
2


In [17]:
# Shape of the array (lenght of each dimension)

print(a.shape)
print(b.shape)

(3,)
(1, 3)


In [18]:
# Size of the array (total number of elements)

print(a.size)
print(b.size)

3
3


In [19]:
# Data type of the elements of the array

print(a.dtype)
print(b.dtype)

int64
int64


### Sorting the array (Sorting 1 Dimensional Arrays)

In [3]:
a = np.array([45,345,3563,23,324,5,3234,12,234,2,23,2,21,1])

In [4]:
# Note that the sort function modifies the array itself and does not create a copy
a.sort()

In [5]:
print(a)

[   1    2    2    5   12   21   23   23   45  234  324  345 3234 3563]


In [6]:
a = np.array([45,345,3563,23,324,5,3234,12,234,2,23,2,21,1])
np.sort(a)

array([   1,    2,    2,    5,   12,   21,   23,   23,   45,  234,  324,
        345, 3234, 3563])

In [7]:
# Note that using np.sort instead did not modify the original array
print(a)

[  45  345 3563   23  324    5 3234   12  234    2   23    2   21    1]


### Sorting the array (Sorting 2 Dimensional Arrays)

In [86]:
b = np.array([[-1, 400, -3, 87], [9, 10, 11, 12], [300, -2, 500, 600], [23, 56, 32, -4]])
b

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [300,  -2, 500, 600],
       [ 23,  56,  32,  -4]])

In [87]:
# in place sorting
# by default each row gets sorted (axis = 0)
b.sort()

In [88]:
b

array([[ -3,  -1,  87, 400],
       [  9,  10,  11,  12],
       [ -2, 300, 500, 600],
       [ -4,  23,  32,  56]])

In [89]:
b = np.array([[-1, 400, -3, 87], [9, 10, 11, 12], [300, -2, 500, 600], [23, 56, 32, -4]])
b

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [300,  -2, 500, 600],
       [ 23,  56,  32,  -4]])

In [90]:
# orginal array left untouched and new array returned (np.sort())
# again by default each row is sorted (axis = 0)
np.sort(b)

array([[ -3,  -1,  87, 400],
       [  9,  10,  11,  12],
       [ -2, 300, 500, 600],
       [ -4,  23,  32,  56]])

In [91]:
b

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [300,  -2, 500, 600],
       [ 23,  56,  32,  -4]])

In [93]:
b = np.array([[-1, 400, -3, 87], [9, 10, 11, 12], [300, -2, 500, 600], [23, 56, 32, -4]])
b

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [300,  -2, 500, 600],
       [ 23,  56,  32,  -4]])

In [94]:
np.sort(b, axis = 0) # axis = 0 each column gets sorted

array([[ -1,  -2,  -3,  -4],
       [  9,  10,  11,  12],
       [ 23,  56,  32,  87],
       [300, 400, 500, 600]])

In [300]:
b = np.array([[-1, 400, -3, 87], [9, 10, 11, 12], [300, -2, 500, 600], [23, 56, 32, -4]])
b

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [300,  -2, 500, 600],
       [ 23,  56,  32,  -4]])

In [332]:
sorted_indices = np.argsort(b)
sorted_indices

array([[2, 0, 3, 1],
       [0, 1, 2, 3],
       [1, 0, 2, 3],
       [3, 0, 2, 1]])

In [343]:
b[0,sorted_indices[0,0]]

np.int64(-1)

In [335]:
sorted_indices = np.argsort(b, axis = 0)
sorted_indices

array([[0, 2, 0, 3],
       [1, 1, 1, 1],
       [3, 3, 3, 0],
       [2, 0, 2, 2]])

In [419]:
b = np.array([[-1, 400, -3, 87],
              [9, 10, 11, 12],
              [300, -2, 500, 600],
              [23, 56, 32, -4],
              [0, 0, 90, 12]
             ]
            )
b

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [300,  -2, 500, 600],
       [ 23,  56,  32,  -4],
       [  0,   0,  90,  12]])

In [432]:
b[:,0]

array([ -1,   9, 300,  23,   0])

In [480]:
# Sort by 0th column, then by 1st column and then by 2nd column
sorted_indices = np.lexsort((b[:,0], b[:,1], b[:,2]))
sorted_indices

array([0, 1, 3, 4, 2])

In [460]:
b[sorted_indices]

array([[ -1, 400,  -3,  87],
       [  9,  10,  11,  12],
       [ 23,  56,  32,  -4],
       [  0,   0,  90,  12],
       [300,  -2, 500, 600]])

In [473]:
# Sort by 2nd column, then by 1st column and then by 0th column
sorted_indices = np.lexsort((b[:,2], b[:,1], b[:,0]))
sorted_indices

array([0, 4, 1, 3, 2])

In [474]:
b[sorted_indices]

array([[ -1, 400,  -3,  87],
       [  0,   0,  90,  12],
       [  9,  10,  11,  12],
       [ 23,  56,  32,  -4],
       [300,  -2, 500, 600]])

In [477]:
# Sort by 2nd column, then by 1st column and then by 0th column
# in descending order
sorted_indices = np.lexsort((b[:,2], b[:,1], b[:,0]))[::-1]
sorted_indices

array([2, 3, 1, 4, 0])

In [478]:
b[sorted_indices]

array([[300,  -2, 500, 600],
       [ 23,  56,  32,  -4],
       [  9,  10,  11,  12],
       [  0,   0,  90,  12],
       [ -1, 400,  -3,  87]])

In [479]:
# np.str_ is a string type in numpy, it requires a length to be specified
dtype = [("first_name", np.str_, (16)), ("last_name", "S10"), ("age", np.int32), ("math_score", np.float16), ("physics_score", np.float16)]
dtype

[('first_name', numpy.str_, 16),
 ('last_name', 'S10'),
 ('age', numpy.int32),
 ('math_score', numpy.float16),
 ('physics_score', numpy.float16)]

In [307]:
array_values = [("Chaitanya", "Anand", 34, 60.343, 70.4),
                ("Bob", "Marley", 12, 65.4333, 70.456),
                ("Bob", "Dylan", 5, 99, 99),
                ("Albert", "Einstein", 70, 100, 100),
                ("Bob", "Dylan", 30, 99, 99),
                ("Albert", "Dylan", 32, 45, 46),]
array_values

[('Chaitanya', 'Anand', 34, 60.343, 70.4),
 ('Bob', 'Marley', 12, 65.4333, 70.456),
 ('Bob', 'Dylan', 5, 99, 99),
 ('Albert', 'Einstein', 70, 100, 100),
 ('Bob', 'Dylan', 30, 99, 99),
 ('Albert', 'Dylan', 32, 45, 46)]

In [308]:
a = np.array(array_values, dtype = dtype)
a

array([('Chaitanya', b'Anand', 34,  60.34,  70.4 ),
       ('Bob', b'Marley', 12,  65.44,  70.44),
       ('Bob', b'Dylan',  5,  99.  ,  99.  ),
       ('Albert', b'Einstein', 70, 100.  , 100.  ),
       ('Bob', b'Dylan', 30,  99.  ,  99.  ),
       ('Albert', b'Dylan', 32,  45.  ,  46.  )],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [309]:
# Note that for structured arrays the default behavior is different (rows get sorted by first, second ... column respectively) i.e. axis = 0
np.sort(a)

array([('Albert', b'Dylan', 32,  45.  ,  46.  ),
       ('Albert', b'Einstein', 70, 100.  , 100.  ),
       ('Bob', b'Dylan',  5,  99.  ,  99.  ),
       ('Bob', b'Dylan', 30,  99.  ,  99.  ),
       ('Bob', b'Marley', 12,  65.44,  70.44),
       ('Chaitanya', b'Anand', 34,  60.34,  70.4 )],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [310]:
# Note that axis one gives an error for structured arrays
# i.e. the array is like a data table with records
try:
    np.sort(a, axis = 1)
except:
    print("error")

error


In [311]:
a

array([('Chaitanya', b'Anand', 34,  60.34,  70.4 ),
       ('Bob', b'Marley', 12,  65.44,  70.44),
       ('Bob', b'Dylan',  5,  99.  ,  99.  ),
       ('Albert', b'Einstein', 70, 100.  , 100.  ),
       ('Bob', b'Dylan', 30,  99.  ,  99.  ),
       ('Albert', b'Dylan', 32,  45.  ,  46.  )],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [312]:
np.sort(a, order = ["math_score"])

array([('Albert', b'Dylan', 32,  45.  ,  46.  ),
       ('Chaitanya', b'Anand', 34,  60.34,  70.4 ),
       ('Bob', b'Marley', 12,  65.44,  70.44),
       ('Bob', b'Dylan',  5,  99.  ,  99.  ),
       ('Bob', b'Dylan', 30,  99.  ,  99.  ),
       ('Albert', b'Einstein', 70, 100.  , 100.  )],
      dtype=[('first_name', '<U16'), ('last_name', 'S10'), ('age', '<i4'), ('math_score', '<f2'), ('physics_score', '<f2')])

In [331]:
# argsort will return the indices of the original array in their sorted order
math_rank = np.argsort(a, order= ["math_score"])[::-1]
math_rank

array([3, 4, 2, 1, 0, 5])

In [327]:
# [::-1] reverses the order and sorts by descending order
math_rank = np.argsort(a, order= ["math_score"])[::-1]
math_rank

array([3, 4, 2, 1, 0, 5])

In [328]:
# Top rank person
a[math_rank[0]]['first_name']

np.str_('Albert')

In [329]:
# What did the top math scorer score in Physics
a[math_rank[0]]['physics_score']

np.float16(100.0)

In [330]:
# Second rank person
a[math_rank[1]]['first_name']

np.str_('Bob')

### Adding elements

In [237]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

In [238]:
np.concat((a, b))

array([1, 2, 3, 4, 5, 6])

In [27]:
print(a)
print(b)

[1 2 3]
[4 5 6]


In [28]:
x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6]])

np.concatenate((x, y), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [29]:
# Concat with multi dimensional arrays

a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([[7, 8, 9]]) # <-- Note that we have defined a 2 dimensional array b here. Concatenate will give an error with b = np.array([7, 8, 9])

c = np.array([[8], [9]]) # <-- Also note how a row is initialises vs. a column which would have been np.array([[8, 9]]).This would give an error

In [30]:
np.concatenate((a, b), axis=0)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [31]:
c

array([[8],
       [9]])

In [32]:
np.concatenate((a, c), axis=1)

array([[1, 2, 3, 8],
       [4, 5, 6, 9]])

### Reshaping arrays

In [481]:
a = np.array([1, 2, 3, 4, 5, 6, 7 , 8, 9, 10])

In [482]:
print(a)

[ 1  2  3  4  5  6  7  8  9 10]


In [485]:
# Default is row major ordering
np.reshape(a, shape = (5, 2))

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [486]:
# read or write in C type order. i.e. row major
np.reshape(a, shape = (5, 2), order = 'C')

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [487]:
# read or write in Fortran type order. i.e. row major
np.reshape(a, shape = (5, 2), order = 'F')

array([[ 1,  6],
       [ 2,  7],
       [ 3,  8],
       [ 4,  9],
       [ 5, 10]])

In [488]:
b = np.reshape(a, shape = (5, 2))

In [489]:
# The original array is unmodified
print(a)
print(b)

[ 1  2  3  4  5  6  7  8  9 10]
[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]]


In [490]:
# Reshape 2 D back to original shape (note that the array remains 2 dimensional after reventing)
np.reshape(b, shape = (1, 10))

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])

In [491]:
# Reshape 2 D back to a row instead of column
np.reshape(b, shape = (10, 1))

array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10]])

### Changing the number of dimensions

In [42]:
# here is a1 dimensional array

a = np.array([1, 2, 3])

In [43]:
a.shape

(3,)

In [44]:
# We can turn this into a 2 dimensional array as follows. Weird syntax!

a[np.newaxis, :]

array([[1, 2, 3]])

In [45]:
# Adding the axis on column

a[:, np.newaxis]

array([[1],
       [2],
       [3]])

In [46]:
# We can also expand the dimensions using the expanddims function

c = np.expand_dims(a, axis = 0)
d = np.expand_dims(a, axis = 1)

print(c)
print(d)

[[1 2 3]]
[[1]
 [2]
 [3]]


### Subsetting the array

In [47]:
# The slicing and dicing functionality are the basic subsetting methods in Numpy

a = np.array([1, 2, 3, 4, 5, 6, 7, 8])

a[4:7]

array([5, 6, 7])

In [48]:
# Negative index

a[-5:]
a[-5:-3]

array([4, 5])

In [49]:
# Index out of bounds

try:
    a[8]
except IndexError:
    print("Index out of bounds error")


try:
    a[-9]
except IndexError:
    print("Index out of bounds error with negative index")

Index out of bounds error
Index out of bounds error with negative index


In [50]:
# A logical statement on the array is applied element wise and a boolean array is generated
# This boolean array can be used to subset another array
less_than_6  = a<6

print(less_than_6)

[ True  True  True  True  True False False False]


In [51]:
# We can also subset based on a condition

a[less_than_6]

# Or just directly use
a[a<6]

array([1, 2, 3, 4, 5])

In [52]:
a[a%2==0]

a[(a%2==0) & (a > 4)]

array([6, 8])

In [53]:
# We can also use the np.nonzero() function for subsetting
# np.nonzero returns the indices of the elements that are non zero
a = np.array([1, 2, 3, 4, 5, 6, 7, 8])

print(np.nonzero(a))

a[np.nonzero(a)]

(array([0, 1, 2, 3, 4, 5, 6, 7]),)


array([1, 2, 3, 4, 5, 6, 7, 8])

In [54]:
# Subsetting using nonzero

a[np.nonzero(a>5)]

array([6, 7, 8])

### Stacking arrays

In [55]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([24, 34, 56, 76, 324, 54])

In [56]:
print(np.hstack((a, b)))
print(np.vstack((a, b)))

[  1   2   3   4   5   6  24  34  56  76 324  54]
[[  1   2   3   4   5   6]
 [ 24  34  56  76 324  54]]


In [57]:
# Note the slight difference when doing the same thing with 2 dimensional arrays
a = np.array([[1, 2, 3, 4, 5, 6]])
b = np.array([[24, 34, 56, 76, 324, 54]])

In [58]:
print(np.hstack((a, b))) # <-- this output is different in terms of its dimensionality
print(np.vstack((a, b))) # <- this output is similar to the case above

[[  1   2   3   4   5   6  24  34  56  76 324  54]]
[[  1   2   3   4   5   6]
 [ 24  34  56  76 324  54]]


### Splitting arrays

In [59]:
a = np.array([[1, 2, 3, 4, 5, 6], [24, 34, 56, 76, 324, 54], [23, 2, 2, 1, 1, 6]])

In [60]:
a

array([[  1,   2,   3,   4,   5,   6],
       [ 24,  34,  56,  76, 324,  54],
       [ 23,   2,   2,   1,   1,   6]])

In [61]:
# Split into n equal parts

print(np.hsplit(a, 2))

print(np.hsplit(a, 3))
try:
    np.hsplit(a, 4)
except:
    print('Not possible to split into 4 equal parts')

[array([[ 1,  2,  3],
       [24, 34, 56],
       [23,  2,  2]]), array([[  4,   5,   6],
       [ 76, 324,  54],
       [  1,   1,   6]])]
[array([[ 1,  2],
       [24, 34],
       [23,  2]]), array([[ 3,  4],
       [56, 76],
       [ 2,  1]]), array([[  5,   6],
       [324,  54],
       [  1,   6]])]
Not possible to split into 4 equal parts


In [62]:
# Same but with v split
np.vsplit(a, 3)

[array([[1, 2, 3, 4, 5, 6]]),
 array([[ 24,  34,  56,  76, 324,  54]]),
 array([[23,  2,  2,  1,  1,  6]])]

In [63]:
# Split at a particular rows
print(a)
print(np.hsplit(a, (1, 4)))

[[  1   2   3   4   5   6]
 [ 24  34  56  76 324  54]
 [ 23   2   2   1   1   6]]
[array([[ 1],
       [24],
       [23]]), array([[ 2,  3,  4],
       [34, 56, 76],
       [ 2,  2,  1]]), array([[  5,   6],
       [324,  54],
       [  1,   6]])]


In [64]:
# Split at a particular rows
print(a)
print(np.vsplit(a, (1, 2)))

[[  1   2   3   4   5   6]
 [ 24  34  56  76 324  54]
 [ 23   2   2   1   1   6]]
[array([[1, 2, 3, 4, 5, 6]]), array([[ 24,  34,  56,  76, 324,  54]]), array([[23,  2,  2,  1,  1,  6]])]


### Views and copies

In [65]:
# A view is another variable pointing to the same array. Modifying the view changes the original

a = np.array([1, 3, 4])

b = a.view()

In [66]:
print(a)
print(b)

[1 3 4]
[1 3 4]


In [67]:
b[2] = 200

In [68]:
print(a)
print(b)

[  1   3 200]
[  1   3 200]


In [69]:
# A copy on the other hand is an exact replica. Modifying the replica does not modify the original

a = np.array([1, 3, 4])

b = a.copy()

In [70]:
print(a)
print(b)

[1 3 4]
[1 3 4]


In [71]:
b[2] = 200

In [72]:
print(a)
print(b)

[1 3 4]
[  1   3 200]


### Basic array operations

In [73]:
a = np.array([1, 3, 4])
b = np.array([3, 5, 7])

In [74]:
a + b

array([ 4,  8, 11])

In [75]:
a - b

array([-2, -2, -3])

In [76]:
a * b

array([ 3, 15, 28])

In [77]:
# Matric Multiplication
a ** b

array([    1,   243, 16384])

In [78]:
a / b

array([0.33333333, 0.6       , 0.57142857])

### Summarising

In [79]:
a = np.array([1, 3, 4])

In [80]:
print(a.sum())

8


In [81]:
print(a.max())

4


In [82]:
print(a.min())

1


In [83]:
a = np.array([[1, 3, 4], [23,45,67]])

In [84]:
print(a)

print(a.sum())
print(a.sum(axis = 0))
print(a.sum(axis = 1))

[[ 1  3  4]
 [23 45 67]]
143
[24 48 71]
[  8 135]


### Generating random numbers

In [85]:
rng = np.random.default_rng()

In [86]:
rng.random()

0.2031875875964987

In [87]:
rng.standard_normal(10)

array([-0.88676708, -2.49986395,  2.10836967, -0.00665488, -0.58205241,
        0.30839614, -0.51152656, -0.63360813,  0.89272959, -1.89943904])

In [88]:
rng.integers(low=0, high=10, size=5)

array([3, 4, 1, 1, 0])

In [89]:
rng.integers(low=0, high=10, size=(2, 4))

array([[6, 5, 3, 2],
       [5, 9, 7, 9]])

### Uniques and counts

In [90]:
a = np.array([1,2,34,34,4,6536,1, 12, 12, 12])

In [91]:
print(np.unique(a))

# Count of occurrances of each unique value
print(np.unique(a, return_counts=True))

# First index position of the unique value
print(np.unique(a, return_index=True))

[   1    2    4   12   34 6536]
(array([   1,    2,    4,   12,   34, 6536]), array([2, 1, 1, 3, 2, 1]))
(array([   1,    2,    4,   12,   34, 6536]), array([0, 1, 4, 7, 2, 5]))


### Transposing

In [92]:
a = np.array([[1,2,34,34,4], [6536,1, 12, 12, 12]])

In [93]:
a

array([[   1,    2,   34,   34,    4],
       [6536,    1,   12,   12,   12]])

In [94]:
a.T

array([[   1, 6536],
       [   2,    1],
       [  34,   12],
       [  34,   12],
       [   4,   12]])

In [95]:
a.transpose()

array([[   1, 6536],
       [   2,    1],
       [  34,   12],
       [  34,   12],
       [   4,   12]])

### Reversing

In [96]:
a = np.array([1,2,34,34,4, 6536,1, 12, 12, 12])

In [97]:
np.flip(a)

array([  12,   12,   12,    1, 6536,    4,   34,   34,    2,    1])

In [98]:
a = np.array([[1,2,34,34,4], [6536,1, 12, 12, 12]])

In [99]:
a

array([[   1,    2,   34,   34,    4],
       [6536,    1,   12,   12,   12]])

In [100]:
np.flip(a)

array([[  12,   12,   12,    1, 6536],
       [   4,   34,   34,    2,    1]])

In [101]:
print(a)
# Reverse each column
np.flip(a, axis = 0)

[[   1    2   34   34    4]
 [6536    1   12   12   12]]


array([[6536,    1,   12,   12,   12],
       [   1,    2,   34,   34,    4]])

In [102]:
print(a)
# Reverse each row
np.flip(a, axis = 1)

[[   1    2   34   34    4]
 [6536    1   12   12   12]]


array([[   4,   34,   34,    2,    1],
       [  12,   12,   12,    1, 6536]])

### Flattening the array

In [103]:
x = np.array([[1 , 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

In [104]:
# Creates a new array. Original unaffected by any changes
x.flatten()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [105]:
x

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [106]:
a = x.flatten()

In [107]:
a[0] = 2000

In [108]:
print(x) # <- Original unaffected
print(a)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[2000    2    3    4    5    6    7    8    9   10   11   12]


In [109]:
# Creates a flat view of the original. Any changes changes the original
x.ravel()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [110]:
x

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [111]:
a = x.ravel()

In [112]:
a[0] = 2000

In [113]:
print(x) # <- Original also changes
print(a)

[[2000    2    3    4]
 [   5    6    7    8]
 [   9   10   11   12]]
[2000    2    3    4    5    6    7    8    9   10   11   12]


### Saving and loading NumPy Objects

np.save, np.savez, np.savetxt, np.load, np.loadtxt

You will, at some point, want to save your arrays to disk and load them back without having to re-run the code. Fortunately, there are several ways to save and load objects with NumPy. The ndarray objects can be saved to and loaded from the disk files with loadtxt and savetxt functions that handle normal text files, load and save functions that handle NumPy binary files with a .npy file extension, and a savez function that handles NumPy files with a .npz file extension.

The .npy and .npz files store data, shape, dtype, and other information required to reconstruct the ndarray in a way that allows the array to be correctly retrieved, even when the file is on another machine with different architecture.

In [114]:
a = np.array([1, 2, 3, 4, 5, 6])

In [118]:
np.save('data/npy_saved_file', a)

In [119]:
b = np.load('data/npy_saved_file.npy')

In [None]:
print(b)

In [120]:
# csvs result in some datatype differences
np.savetxt('data/np_saved_csv.csv', a)

In [121]:
np.loadtxt('data/np_saved_csv.csv')

array([1., 2., 3., 4., 5., 6.])

### *** End of File ***