<h1>Basics of Numpy Arrays</h1>

<h3> We will cover few categories of basic array manipulations here </h3>
- Attributes of Arrays
- Indexing of Arrays
- Slicing of Arrays
- Reshaping of Arrays
- Joining and splitting of Arrays

<h4>Numpy Array Attributes</h4>

In [1]:
# Determining the size, shape, memory consumption and data types of arrays
# Define three random arrays
import numpy as np
np.random.seed(0) # seed for reproducibility

In [2]:
one_dimensional_array = np.random.randint(10, size=6)
two_dimensional_array = np.random.randint(10, size=(3,4))
three_dimensional_array = np.random.randint(10, size=(3,4,5))

# Print the three arrays
print(one_dimensional_array)
print(two_dimensional_array)
print(three_dimensional_array)

[5 0 3 3 7 9]
[[3 5 2 4]
 [7 6 8 8]
 [1 6 7 7]]
[[[8 1 5 9 8]
  [9 4 3 0 3]
  [5 0 2 3 8]
  [1 3 3 3 7]]

 [[0 1 9 9 0]
  [4 7 3 2 7]
  [2 0 0 4 5]
  [5 6 8 4 1]]

 [[4 9 8 1 1]
  [7 9 9 3 6]
  [7 2 0 3 5]
  [9 4 4 6 4]]]


In [6]:
# Each Array has attributes - ndim - (the number of dimensions of array)
print("one_dimensional_array is ndim ", one_dimensional_array.ndim)
print("two_dimensional_array is ndim ", two_dimensional_array.ndim)
print("three_dimensional_array is ndim ", three_dimensional_array.ndim)

one_dimensional_array is ndim  1
two_dimensional_array is ndim  2
three_dimensional_array is ndim  3


In [5]:
# Each Array has attribute - shape - (the size of each dimension)
print("one_dimensional_array is shape ", one_dimensional_array.shape)
print("two_dimensional_array is shape ", two_dimensional_array.shape)
print("three_dimensional_array is shape ", three_dimensional_array.shape)

one_dimensional_array is shape  (6,)
two_dimensional_array is shape  (3, 4)
three_dimensional_array is shape  (3, 4, 5)


In [7]:
# Each array has attribute - size - (the total size of the array)
print("one_dimensional_array is size ", one_dimensional_array.size)
print("two_dimensional_array is size ", two_dimensional_array.size)
print("three_dimensional_array is size ", three_dimensional_array.size)

one_dimensional_array is size  6
two_dimensional_array is size  12
three_dimensional_array is size  60


In [8]:
# dtype attribute tells the data type of the array
print("one_dimensional_array dtype: ",one_dimensional_array.dtype)
print("two_dimensional_array dtype: ", two_dimensional_array.dtype)
print("three_dimensional_array dtype: ",three_dimensional_array.dtype)

one_dimensional_array dtype:  int64
two_dimensional_array dtype:  int64
three_dimensional_array dtype:  int64


In [9]:
# itemsize attribute lists the size of each array element
print("one_dimensional_array itemsize: ",one_dimensional_array.itemsize, " bytes")
print("two_dimensional_array itemsize: ", two_dimensional_array.itemsize, " bytes")
print("three_dimensional_array itemsize: ",three_dimensional_array.itemsize, " bytes")

one_dimensional_array itemsize:  8  bytes
two_dimensional_array itemsize:  8  bytes
three_dimensional_array itemsize:  8  bytes


In [11]:
# nbytes lists the total size (in bytes of array)
# nbytes is equal to to itemsize times size. 
print("one_dimensional_array nbytes: ",one_dimensional_array.nbytes, " bytes")
print("two_dimensional_array nbytes: ", two_dimensional_array.nbytes, " bytes")
print("three_dimensional_array nbytes: ",three_dimensional_array.nbytes, " bytes")

one_dimensional_array nbytes:  48  bytes
two_dimensional_array nbytes:  96  bytes
three_dimensional_array nbytes:  480  bytes


<h4> Numpy Array Indexing: Accessing Single Elements </h4>

In [12]:
# Getting and setting the values of individual array elements. 
# In a one dimensional array, you can access the ith value by specifiying the desired index in square brackets. 
one_dimensional_array

array([5, 0, 3, 3, 7, 9])

In [13]:
one_dimensional_array[0]

5

In [14]:
one_dimensional_array[4]

7

In [15]:
# To index from the end of the array
one_dimensional_array[-1]

9

In [16]:
one_dimensional_array[-2]

7

In [17]:
# In a multi dimensional array you access items using a comma separated tuple of indices.
two_dimensional_array

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [18]:
two_dimensional_array[0,0]

3

In [19]:
two_dimensional_array[2,0]

1

In [20]:
two_dimensional_array[2,-1]

7

In [21]:
# The values can also be modified using any of the notations.
two_dimensional_array[0,0] = 12
two_dimensional_array

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [22]:
# As Numpy arrays have a fixed type - If you insert a floating type value to an integer array , the value will be 
# silently truncated
one_dimensional_array[0] = 3.14
one_dimensional_array

array([3, 0, 3, 3, 7, 9])

<h4>Array Slicing: Accessing Subarrays</h4>

In [24]:
# Getting and setting smaller subarrays within a larger array
# We can use square brackets to access subarrays with the slice notation, marked with colon : character
# Syntax is for array x : x[start:stop:step]
# If any of these are unspecified the values default to:
# start = 0, stop = soze, step =1 
one_dim_x = np.arange(10)
one_dim_x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [25]:
# First five elements
one_dim_x[:5]

array([0, 1, 2, 3, 4])

In [26]:
# Elements after index 5
one_dim_x[5:]

array([5, 6, 7, 8, 9])

In [27]:
# Middle Sub Array
one_dim_x[4:7]

array([4, 5, 6])

In [28]:
# Every other element
one_dim_x[::2]

array([0, 2, 4, 6, 8])

In [31]:
# Every other element starting at index 1
one_dim_x[1::2]

array([1, 3, 5, 7, 9])

In [33]:
# In case the value of step is negative then defaults of start and stop are swapped. 
# This becomes a convenient way to reverse an array
one_dim_x[::-1]  # All elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [34]:
one_dim_x[5::-2] # Reverse every other from index 5

array([5, 3, 1])

In [35]:
# Multi-dimensional subarray slices work with multiple slices separated by commas. 
two_dimensional_array

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [36]:
# Sub array of two rows three columns
two_dimensional_array[:2,:3]

array([[12,  5,  2],
       [ 7,  6,  8]])

In [38]:
# All rows every other column
two_dimensional_array[:3,::2]

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [39]:
# Subarray dimensions can be reversed
two_dimensional_array[::1, ::-1]

array([[ 4,  2,  5, 12],
       [ 8,  8,  6,  7],
       [ 7,  7,  6,  1]])

In [40]:
# Accessing single rows or columns of an array
# This can be done by indexing and slicing using and empty slice marked by a single colon (:)
print(two_dimensional_array[:,0])

[12  7  1]


In [42]:
print(two_dimensional_array[0,:])

# In case of row access the empty slice can be omitted
print(two_dimensional_array[0])

[12  5  2  4]
[12  5  2  4]


In [43]:
# Slices return view rather than copies of array data
# In Python lists slices will be copies. 
print(two_dimensional_array)

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [44]:
# Extract 2 x 2 subarray from two_dimensional_array
two_dimensional_array_sub = two_dimensional_array[:2,:2]
print(two_dimensional_array_sub)

[[12  5]
 [ 7  6]]


In [45]:
# Modifying the sub array will change the original array
two_dimensional_array_sub[0,0] = 99
print(two_dimensional_array_sub)

[[99  5]
 [ 7  6]]


In [47]:
# print original array
print(two_dimensional_array)

# This behaviour is useful when we work with large data sets. We can access and process pieces of these datasets
# without the need to copy the underlying data buffer.

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [48]:
# Creating copy of arrays using copy() method
two_dimensional_array_sub_copy = two_dimensional_array[:2,:2].copy()
print(two_dimensional_array_sub_copy)

[[99  5]
 [ 7  6]]


In [49]:
# on modifying the subarray the original array is not touched. 
two_dimensional_array_sub_copy[0,0] = 42
print(two_dimensional_array_sub_copy)

[[42  5]
 [ 7  6]]


In [51]:
# Original unchanged array
print(two_dimensional_array)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


<h4>Reshaping of Arrays</h4>

In [54]:
# Changing the shape of a given array
# Most flexible way is to use reshape()
grid = np.arange(1,10).reshape((3,3))
print(grid)

# Note: For this to work the size of the original array and the size of the reshaped array should match.

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [55]:
# reshape can also be done using the newaxis method
x = np.array([1,2,3])

# row vector via reshape method
x.reshape((1,3))

array([[1, 2, 3]])

In [56]:
# row vector via newaxis method
x[np.newaxis,:]

array([[1, 2, 3]])

In [57]:
# Column vector using reshape method
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [58]:
# Column vector using newaxis method
x[:,np.newaxis]

array([[1],
       [2],
       [3]])

<h4>Array Concatenation and Splitting</h4>

In [59]:
# Combining multiple arrays into one, and splitting one array into many
# Concatenation of Arrays
# Concatenation is done using following routines:
# - np.concatenate
# - np.vstack
# - np.hstack

In [60]:
# np.concatenate takes a tuple or list of arrays as its first argument
x = np.array([1,2,3])
y = np.array([3,2,1])
np.concatenate([x,y])

array([1, 2, 3, 3, 2, 1])

In [61]:
# Multiple arrays can be concatenated at once
z = [99,99,99]
np.concatenate([x,y,z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [62]:
# np.concatenate can also be used for two dimensional arrays
grid = np.array([[1,2,3],
                 [4,5,6]])
# Concatenate along the first axis
np.concatenate([grid,grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [63]:
# Concatenate along second axis where in axis is zero-indexed
np.concatenate([grid,grid], axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [65]:
# For working with arrays of mixed dimensions we should use np.vstack (vertical stack) 
# and np.hstack(horizontal stack)
x = np.array([1,2,3])
grid = np.array([[9,8,7],
                [6,5,4]])
# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [68]:
# horizontally stack the array
y = np.array([[99],
            [99]])
np.hstack([grid, y])

# np.dstack will stack arrays along the third axis. 

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [69]:
# Splitting is opposite to concatenation
# It is implemented by the following functions:
# - np.split
# - np.vsplit
# - np.hsplit

In [70]:
# Splitting the array
x = [1,2,3,99,99,3,2,1]
x1,x2,x3 = np.split(x, [3,5])
print(x1,x2,x3)

[1 2 3] [99 99] [3 2 1]


In [71]:
# N Split points lead to N + 1 subarrays
grid = np.arange(16).reshape(4,4)
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [72]:
# Vertical split is using np.vsplit
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [73]:
# horizontal split is using np.hsplit
left, right = np.hsplit(grid, [2])
print(left)
print(right)

# np.dsplit will split the array on third axis. 

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
