Creating Arrays in NumPy

Create a 1D NumPy Array

In [4]:
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr)


[1 2 3 4 5]


Create a 2D NumPy Array

In [6]:
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
print(arr_2d)


[[1 2 3]
 [4 5 6]]


Create an Array with Zeros

In [8]:
zeros = np.zeros((3, 3))  # 3x3 matrix of zeros
print(zeros)


[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


Create an Array with Ones

In [10]:
ones = np.ones((2, 3))  # 2x3 matrix of ones
print(ones)


[[1. 1. 1.]
 [1. 1. 1.]]


 Create an Identity Matrix

In [12]:
identity_matrix = np.eye(3)
print(identity_matrix)


[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


Create an Array with Random Numbers

In [14]:
random_array = np.random.rand(3, 3)  # 3x3 matrix with random values
print(random_array)


[[0.27018937 0.06253115 0.44658084]
 [0.82412457 0.2201078  0.64127901]
 [0.0499571  0.47113413 0.37378149]]


Create an Array with a Range of Numbers

In [16]:
arr_range = np.arange(1, 11, 2)  # Numbers from 1 to 10 with step size of 2
print(arr_range)


[1 3 5 7 9]


# Checking Array Properties

In [18]:
print(arr.ndim)   # Number of dimensions
print(arr.shape)  # Shape of the array
print(arr.size)   # Total number of elements
print(arr.dtype)  # Data type of elements


1
(5,)
5
int64


#Accessing & Modifying Elements

Indexing (Similar to Lists)

In [20]:
arr = np.array([10, 20, 30, 40, 50])
print(arr[1])  # Access 2nd element (index 1)


20


Slicing Arrays

In [22]:
print(arr[1:4])  # Elements from index 1 to 3


[20 30 40]


#Mathematical Operations on Arrays

Element-wise Arithmetic Operations

In [24]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print(a + b)  # [5 7 9]
print(a - b)  # [-3 -3 -3]
print(a * b)  # [4 10 18]
print(a / b)  # [0.25 0.4 0.5]
print(a ** 2) # [1 4 9] (Square each element)


[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]
[1 4 9]


Aggregate Functions

In [26]:
arr = np.array([10, 20, 30, 40, 50])
print(np.sum(arr))  # Sum of all elements
print(np.mean(arr)) # Mean (average)
print(np.max(arr))  # Maximum value
print(np.min(arr))  # Minimum value
print(np.std(arr))  # Standard deviation
print(np.median(arr))  # Median value


150
30.0
50
10
14.142135623730951
30.0


#Reshaping & Manipulating Arrays

Reshaping Arrays

In [28]:
arr = np.array([1, 2, 3, 4, 5, 6])
reshaped_arr = arr.reshape(2, 3)  # Convert to 2x3 matrix
print(reshaped_arr)


[[1 2 3]
 [4 5 6]]


Flatten a 2D Array to 1D

In [30]:
flattened = reshaped_arr.flatten()
print(flattened)


[1 2 3 4 5 6]


# Matrix Operations with NumPy

Matrix Multiplication

In [32]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

result = np.dot(A, B)  # Matrix multiplication
print(result)


[[19 22]
 [43 50]]


Transpose of a Matrix

In [34]:
print(A.T)  # Transpose of A


[[1 3]
 [2 4]]


# Broadcasting in NumPy

NumPy allows operations between arrays of different shapes through broadcasting.

In [36]:
arr = np.array([1, 2, 3])
print(arr + 10)  # Adds 10 to each element


[11 12 13]


# Handling Missing Values

In [38]:
arr = np.array([1, 2, np.nan, 4, 5])
print(np.isnan(arr))  # Check for NaN values
print(np.nanmean(arr))  # Ignore NaN and compute mean


[False False  True False False]
3.0


# NumPy vs Python Lists (Speed Comparison)

In [40]:
import time

size = 1000000
list1 = list(range(size))
list2 = list(range(size))

start = time.time()
result = [x + y for x, y in zip(list1, list2)]
print("Python List Time:", time.time() - start)

arr1 = np.array(list1)
arr2 = np.array(list2)

start = time.time()
result = arr1 + arr2
print("NumPy Array Time:", time.time() - start)


Python List Time: 0.047128915786743164
NumPy Array Time: 0.010000944137573242


In [44]:
import pandas as pd  # Import pandas for data manipulation
import os  # Import os for file path operations

# Function to get the full path of a file
def getFilePath(filename):
    currentDir = os.getcwd()  # Get the current working directory
    fullPath = os.path.join(currentDir, filename)  # Construct the full file path
    return fullPath

# Read the CSV file
df = pd.read_csv(getFilePath('Test_data.csv'))

# Print the first 5 rows of the dataset
print(df.head())


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [46]:
pwd

'/Users/hemraj1980/Documents'

Create a Matrix of Zeros

In [50]:
import numpy as np
zeros_matrix = np.zeros((3, 3))
print(zeros_matrix)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


Create a Matrix of Ones

In [58]:
ones_matrix = np.ones((3, 3))
print(ones_matrix)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


This line of code initializes a 3×3 matrix filled with elements having a value of 1 using NumPy’s np.ones() function.

Identity Matrix

In [54]:
identity_matrix = np.eye(3)
print(identity_matrix)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


An Identity matrix is a square matrix that has 1s along its main diagonal and 0s everywhere else. Below is an Identity matrix of shape 3 x 3.

However, NumPy gives you the flexibility to change the diagonal along which the values have to be 1s. You can either move it above the main diagonal:

In [60]:
# not an identity matrix
non_identity_matrix = np.eye(3,k=1)
print(non_identity_matrix)

[[0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 0.]]


In [62]:
non_identity_matrix_1 = np.eye(3,k=-2)
print(non_identity_matrix_1)

[[0. 0. 0.]
 [0. 0. 0.]
 [1. 0. 0.]]


Note: A matrix is called the Identity matrix only when the 1s are along the main diagonal and not any other diagonal!

Evenly spaced ndarray

In [64]:
arr1 = np.arange(5)
print(arr1)

[0 1 2 3 4]


In [66]:
arr2 = np.arange(2,10,2)
print(arr2)

[2 4 6 8]


A point to be noted here is that the interval is defined as [start,end) where the last number will not be included in the array:

Another similar function is np.linspace(), but instead of step size, it takes in the number of samples that need to be retrieved from the interval. A point to note here is that the last number is included in the values returned unlike in the case of np.arange().

In [68]:
arr3 = np.linspace(0,1,5)
print(arr3)

[0.   0.25 0.5  0.75 1.  ]


The Shape and Reshaping of NumPy Arrays

Dimensions of NumPy arrays
You can easily determine the number of dimensions or axes of a NumPy array using the ndims attribute:

In [70]:
# number of axis
a = np.array([[5,10,15],[20,25,20]])
print('Array :','\n',a)
print('Dimensions :','\n',a.ndim)

Array : 
 [[ 5 10 15]
 [20 25 20]]
Dimensions : 
 2


This array has two dimensions: 2 rows and 3 columns.

Shape of NumPy array

The shape is an attribute of the NumPy array that shows how many rows of elements are there along each dimension. You can further index the shape so returned by the ndarray to get value along each dimension:

In [72]:
a = np.array([[1,2,3],[4,5,6]])
print('Array :','\n',a)
print('Shape :','\n',a.shape)
print('Rows = ',a.shape[0])
print('Columns = ',a.shape[1])

Array : 
 [[1 2 3]
 [4 5 6]]
Shape : 
 (2, 3)
Rows =  2
Columns =  3


Size of NumPy array

You can determine how many values there are in the array using the size attribute. It just multiplies the number of rows by the number of columns in the ndarray:

In [76]:
# size of array
a = np.array([[5,10,15],[20,25,20]])
print('Array :','\n',a)
print('Size of array :',a.size)
print('Manual determination of size of array :',a.shape[0]*a.shape[1])

Array : 
 [[ 5 10 15]
 [20 25 20]]
Size of array : 6
Manual determination of size of array : 6


Shape = 3 *2
Size = 6
N-dimenssion = 2

Reshaping a NumPy array

Reshaping a ndarray can be done using the np.reshape() method. It changes the shape of the ndarray without changing the data within the ndarray:

In [80]:
# reshape
a = np.array([3,6,9,12])
b = np.reshape(a,(2,2))
print('Array :','\n',a)
print('Array :','\n',b)

Array : 
 [ 3  6  9 12]
Array : 
 [[ 3  6]
 [ 9 12]]


Here, I reshaped the ndarray from a 1-D to a 2-D ndarray.

While reshaping, if you are unsure about the shape of any of the axis, just input -1. NumPy automatically calculates the shape when it sees a -1:

In [82]:
a = np.array([3,6,9,12,18,24])
print('Three rows :','\n',np.reshape(a,(3,-1)))
print('Three columns :','\n',np.reshape(a,(-1,3)))

Three rows : 
 [[ 3  6]
 [ 9 12]
 [18 24]]
Three columns : 
 [[ 3  6  9]
 [12 18 24]]


Flattening a NumPy array

Sometimes when you have a multidimensional array and want to collapse it to a single-dimensional array, you can either use the flatten() method or the ravel() method:

In [84]:
a = np.ones((2,2))
b = a.flatten()
c = a.ravel()
print('Original shape :', a.shape)
print('Array :','\n', a)
print('Shape after flatten :',b.shape)
print('Array :','\n', b)
print('Shape after ravel :',c.shape)
print('Array :','\n', c)

Original shape : (2, 2)
Array : 
 [[1. 1.]
 [1. 1.]]
Shape after flatten : (4,)
Array : 
 [1. 1. 1. 1.]
Shape after ravel : (4,)
Array : 
 [1. 1. 1. 1.]


But an important difference between flatten() and ravel() is that the former returns a copy of the original array while the latter returns a reference to the original array. This means any changes made to the array returned from ravel() will also be reflected in the original array while this will not be the case with flatten().

In [86]:
b[0] = 0
print(a)

[[1. 1.]
 [1. 1.]]


Transpose of a NumPy array

Another very interesting reshaping method of NumPy is the transpose() method. It takes the input array and swaps the rows with the column values, and the column values with the values of the rows:

In [88]:
a = np.array([[1,2,3],
[4,5,6]])
b = np.transpose(a)
print('Original','\n','Shape',a.shape,'\n',a)
print('Expand along columns:','\n','Shape',b.shape,'\n',b)

Original 
 Shape (2, 3) 
 [[1 2 3]
 [4 5 6]]
Expand along columns: 
 Shape (3, 2) 
 [[1 4]
 [2 5]
 [3 6]]


Expanding and Squeezing a NumPy array

Expanding a NumPy array
You can add a new axis to an array using the expand_dims() method by providing the array and the axis along which to expand:

In [90]:
# expand dimensions
a = np.array([1,2,3])
b = np.expand_dims(a,axis=0)
c = np.expand_dims(a,axis=1)
print('Original:','\n','Shape',a.shape,'\n',a)
print('Expand along columns:','\n','Shape',b.shape,'\n',b)
print('Expand along rows:','\n','Shape',c.shape,'\n',c)

Original: 
 Shape (3,) 
 [1 2 3]
Expand along columns: 
 Shape (1, 3) 
 [[1 2 3]]
Expand along rows: 
 Shape (3, 1) 
 [[1]
 [2]
 [3]]


Squeezing a NumPy array

On the other hand, if you instead want to reduce the axis of the array, use the squeeze() method. It removes the axis that has a single entry. This means if you have created a 2 x 2 x 1 matrix, squeeze() will remove the third dimension from the matrix:

In [92]:
# squeeze
a = np.array([[[1,2,3],
[4,5,6]]])
b = np.squeeze(a, axis=0)
print('Original','\n','Shape',a.shape,'\n',a)
print('Squeeze array:','\n','Shape',b.shape,'\n',b)

Original 
 Shape (1, 2, 3) 
 [[[1 2 3]
  [4 5 6]]]
Squeeze array: 
 Shape (2, 3) 
 [[1 2 3]
 [4 5 6]]


Indexing and Slicing of NumPy array

Slicing means retrieving elements from one index to another index. All we have to do is to pass the starting and ending point in the index like this: [start: end].

In [94]:
a = np.array([1,2,3,4,5,6])
print(a[1:5:2])

[2 4]


Notice that the last element did not get considered. This is because slicing includes the start index but excludes the end index.

A way around this is to write the next higher index to the final index value you want to retrieve:

In [96]:
a = np.array([1,2,3,4,5,6])
print(a[1:6:2])

[2 4 6]


If you don’t specify the start or end index, it is taken as 0 or array size, respectively, as default. And the step-size by default is 1.

In [98]:
a = np.array([1,2,3,4,5,6])
print(a[:6:2])
print(a[1::2])
print(a[1:6:])

[1 3 5]
[2 4 6]
[2 3 4 5 6]


Slicing 2-D NumPy arrays

Now, a 2-D array has rows and columns so it can get a little tricky to slice 2-D arrays. But once you understand it, you can slice any dimension array!

Before learning how to slice a 2-D array, let’s have a look at how to retrieve an element from a 2-D array:

In [100]:
a = np.array([[1,2,3],
[4,5,6]])
print(a[0,0])
print(a[1,2])
print(a[1,0])

1
6
4


Here, we provided the row value and column value to identify the element we wanted to extract. While in a 1-D array, we were only providing the column value since there was only 1 row.

So, to slice a 2-D array, you need to mention the slices for both, the row and the column:

In [102]:
a = np.array([[1,2,3],[4,5,6]])
# print first row values
print('First row values :','\n',a[0:1,:])
# with step-size for columns
print('Alternate values from first row:','\n',a[0:1,::2])
# 
print('Second column values :','\n',a[:,1::2])
print('Arbitrary values :','\n',a[0:1,1:3])

First row values : 
 [[1 2 3]]
Alternate values from first row: 
 [[1 3]]
Second column values : 
 [[2]
 [5]]
Arbitrary values : 
 [[2 3]]


Slicing 3-D NumPy arrays

So far we haven’t seen a 3-D array. Let’s first visualize how a 3-D array looks like:

In [104]:
a = np.array([[[1,2],[3,4],[5,6]],# first axis array
[[7,8],[9,10],[11,12]],# second axis array
[[13,14],[15,16],[17,18]]])# third axis array
# 3-D array
print(a)

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]

 [[13 14]
  [15 16]
  [17 18]]]


n addition to the rows and columns, as in a 2-D array, a 3-D array also has a depth axis where it stacks one 2-D array behind the other. So, when you are slicing a 3-D array, you also need to mention which 2-D array you are slicing. This usually comes as the first value in the index:

In [106]:
# value
print('First array, first row, first column value :','\n',a[0,0,0])
print('First array last column :','\n',a[0,:,1])
print('First two rows for second and third arrays :','\n',a[1:,0:2,0:2])

First array, first row, first column value : 
 1
First array last column : 
 [2 4 6]
First two rows for second and third arrays : 
 [[[ 7  8]
  [ 9 10]]

 [[13 14]
  [15 16]]]


If in case you wanted the values as a single dimension array, you can always use the flatten() method to do the job!

In [108]:
print('Printing as a single array :','\n',a[1:,0:2,0:2].flatten())

Printing as a single array : 
 [ 7  8  9 10 13 14 15 16]


Negative slicing of NumPy arrays

An interesting way to slice your array is to use negative slicing. Negative slicing prints elements from the end rather than the beginning. Have a look below:

In [110]:
a = np.array([[1,2,3,4,5],
[6,7,8,9,10]])
print(a[:,-1])

[ 5 10]


In [112]:
print(a[:,-1:-3:-1])

[[ 5  4]
 [10  9]]


Having said that, the basic logic of slicing remains the same, i.e. the end index is never included in the output.

An interesting use of negative slicing is to reverse the original array.

In [114]:
a = np.array([[1,2,3,4,5],
[6,7,8,9,10]])
print('Original array :','\n',a)
print('Reversed array :','\n',a[::-1,::-1])

Original array : 
 [[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
Reversed array : 
 [[10  9  8  7  6]
 [ 5  4  3  2  1]]


You can also use the flip() method to reverse an ndarray.

In [116]:
a = np.array([[1,2,3,4,5],
[6,7,8,9,10]])
print('Original array :','\n',a)
print('Reversed array vertically :','\n',np.flip(a,axis=1))
print('Reversed array horizontally :','\n',np.flip(a,axis=0))

Original array : 
 [[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
Reversed array vertically : 
 [[ 5  4  3  2  1]
 [10  9  8  7  6]]
Reversed array horizontally : 
 [[ 6  7  8  9 10]
 [ 1  2  3  4  5]]


Stacking and Concatenating NumPy arrays

Stacking ndarrays

You can create a new array by combining existing arrays. This you can do in two ways:

Either combine the arrays vertically (i.e. along the rows) using the vstack() method, thereby increasing the number of rows in the resulting array
Or combine the arrays in a horizontal fashion (i.e. along the columns) using the hstack(), thereby increasing the number of columns in the resultant array

In [118]:
a = np.arange(0,5)
b = np.arange(5,10)
print('Array 1 :','\n',a)
print('Array 2 :','\n',b)
print('Vertical stacking :','\n',np.vstack((a,b)))
print('Horizontal stacking :','\n',np.hstack((a,b)))

Array 1 : 
 [0 1 2 3 4]
Array 2 : 
 [5 6 7 8 9]
Vertical stacking : 
 [[0 1 2 3 4]
 [5 6 7 8 9]]
Horizontal stacking : 
 [0 1 2 3 4 5 6 7 8 9]


A point to note here is that the axis along which you are combining the array should have the same size otherwise you are bound to get an error!

In [120]:
a = np.arange(0,5)
b = np.arange(5,9)
print('Array 1 :','\n',a)
print('Array 2 :','\n',b)
print('Vertical stacking :','\n',np.vstack((a,b)))
print('Horizontal stacking :','\n',np.hstack((a,b)))

Array 1 : 
 [0 1 2 3 4]
Array 2 : 
 [5 6 7 8]


ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 5 and the array at index 1 has size 4

Another interesting way to combine arrays is using the dstack() method. It combines array elements index by index and stacks them along the depth axis:

In [122]:
a = [[1,2],[3,4]]
b = [[5,6],[7,8]]
c = np.dstack((a,b))
print('Array 1 :','\n',a)
print('Array 2 :','\n',b)
print('Dstack :','\n',c)
print(c.shape)

Array 1 : 
 [[1, 2], [3, 4]]
Array 2 : 
 [[5, 6], [7, 8]]
Dstack : 
 [[[1 5]
  [2 6]]

 [[3 7]
  [4 8]]]
(2, 2, 2)


Concatenating ndarrays

While stacking arrays is one way of combining old arrays to get a new one, you could also use the concatenate() method where the passed arrays are joined along an existing axis:

In [124]:
a = np.arange(0,5).reshape(1,5)
b = np.arange(5,10).reshape(1,5)
print('Array 1 :','\n',a)
print('Array 2 :','\n',b)
print('Concatenate along rows :','\n',np.concatenate((a,b),axis=0))
print('Concatenate along columns :','\n',np.concatenate((a,b),axis=1))

Array 1 : 
 [[0 1 2 3 4]]
Array 2 : 
 [[5 6 7 8 9]]
Concatenate along rows : 
 [[0 1 2 3 4]
 [5 6 7 8 9]]
Concatenate along columns : 
 [[0 1 2 3 4 5 6 7 8 9]]


The drawback of this method is that the original array must have the axis along which you want to combine. Otherwise, get ready to be greeted by an error.

Another very useful function is the append method that adds new elements to the end of a ndarray. This is obviously useful when you already have an existing ndarray but want to add new values to it.

In [126]:
# append values to ndarray
a = np.array([[1,2],
             [3,4]])
np.append(a,[[5,6]], axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

Broadcasting in NumPy arrays – A class apart!

Broadcasting is one of the best features of ndarrays. It lets you perform arithmetics operations between ndarrays of different sizes or between an ndarray and a simple number!

Broadcasting essentially stretches the smaller ndarray so that it matches the shape of the larger ndarray:

In [128]:
a = np.arange(10,20,2)
b = np.array([[2],[2]])
print('Adding two different size arrays :','\n',a+b)
print('Multiplying an ndarray and a number :',a*2)

Adding two different size arrays : 
 [[12 14 16 18 20]
 [12 14 16 18 20]]
Multiplying an ndarray and a number : [20 24 28 32 36]


Maths with NumPy arrays

Here are some of the most important and useful operations that you will need to perform on your NumPy array.

Basic arithmetic operations on NumPy arrays

The basic arithmetic operations can easily be performed on NumPy arrays. The important thing to remember is that these simple arithmetics operation symbols just act as wrappers for NumPy ufuncs.

In [130]:
print('Subtract :',a-5)
print('Multiply :',a*5)
print('Divide :',a/5)
print('Power :',a**2)
print('Remainder :',a%5)

Subtract : [ 5  7  9 11 13]
Multiply : [50 60 70 80 90]
Divide : [2.  2.4 2.8 3.2 3.6]
Power : [100 144 196 256 324]
Remainder : [0 2 4 1 3]


Mean, Median and Standard deviation

To find the mean and standard deviation of a NumPy array, use the mean(), std() and median() methods:

In [132]:
a = np.arange(5,15,2)
print('Mean :',np.mean(a))
print('Standard deviation :',np.std(a))
print('Median :',np.median(a))

Mean : 9.0
Standard deviation : 2.8284271247461903
Median : 9.0


Min-Max values and their indexes

Min and Max values in an ndarray can be easily found using the min() and max() methods:

In [134]:
a = np.array([[1,6],
[4,3]])
# minimum along a column
print('Min :',np.min(a,axis=0))
# maximum along a row
print('Max :',np.max(a,axis=1))

Min : [1 3]
Max : [6 4]


You can also easily determine the index of the minimum or maximum value in the ndarray along a particular axis using the argmin() and argmax() methods:

In [136]:
a = np.array([[1,6,5],
[4,3,7]])
# minimum along a column
print('Min :',np.argmin(a,axis=0))
# maximum along a row
print('Max :',np.argmax(a,axis=1))

Min : [0 1 0]
Max : [1 2]
