# NumPy Array Manipulation

In [14]:
import numpy as np

arr = np.random.randint(1, 100, size=(10,5))    # 10x5 array with random integers from 1 to 99
print("Original Array:\n", arr)

# Array Manipulation
# 1. Reshaping
reshaped_arr = arr.reshape(5, 10 )
print("Reshaped Array:\n", reshaped_arr)

# 2. Flattening (converting to 1D)
flattened_arr_row = arr.flatten()        #row wise flattening
print("Flattened Array:\n", flattened_arr_row)

flattened_arr_col = arr.ravel(order='F')   #column wise flattening, F= Column wise, C= Row wise
print("Flattened Array (Column-wise):\n", flattened_arr_col)

# 3. Concatenation (adding arrays together)
# Keep in mind the dimensions when concatenating arrays must match along the specified axis. Like we can not concatenate two arrays of shape (10,5) and (8,3) row wise as the number of colunms do not match. As same for column wise concatenation the number of rows must match.

arr2 = np.random.randint(1, 100, size=(1,5))
concatenated_arr = np.concatenate((arr, arr2), axis=0)  #axis=0 for row-wise concatenation, axis=1 for column-wise
print("Concatenated Array (Row-wise):\n", concatenated_arr)

# 4. Transposing (converting rows to columns and vice versa)
transposed_arr = arr.T
print("Transposed Array:\n", transposed_arr)

# 4. Splitting (dividing an array into multiple sub-arrays)
split_arr = np.array_split(arr, 2, axis=0)  # Splitting row-wise into 2 parts
print("Split Arrays (Row-wise):\n", split_arr)

# split vs array_split
# The main difference between np.split() and np.array_split() is how they handle cases where the array cannot be evenly divided. np.split() requires that the array be divisible into equal parts, otherwise it raises an error. In contrast, np.array_split() allows for unequal splits and will create sub-arrays of different sizes if necessary.

# 5. Adding Rows/Columns
new_row = np.random.randint(1, 100, size=(1, 5))
arr = np.vstack([arr, new_row])
print("Array after Adding Row:\n", arr)

new_col = np.random.randint(1, 100, size=(11, 1))
arr = np.hstack([arr, new_col])
print("Array after Adding Column:\n", arr)

# 5. Deleting Rows/Columns
arr = np.delete(arr, 0, axis=0)  # Deleting first row (array index 0)   0 for row, 1 for column
print("Array after Deleting First Row:\n", arr)

arr = np.delete(arr, -1, axis=1)  # Deleting last column (array index -1) 0 for row, 1 for column
print("Array after Deleting Last Column:\n", arr)

Original Array:
 [[79  5 52 15 78]
 [93 73 83 28 48]
 [19 30 57 36 41]
 [27 78  3 11 48]
 [36 73 82 76  8]
 [20 11 15 13 37]
 [29 18 84 20 74]
 [18 99 79  1 69]
 [47 72 86 32 94]
 [59 81 35 23 64]]
Reshaped Array:
 [[79  5 52 15 78 93 73 83 28 48]
 [19 30 57 36 41 27 78  3 11 48]
 [36 73 82 76  8 20 11 15 13 37]
 [29 18 84 20 74 18 99 79  1 69]
 [47 72 86 32 94 59 81 35 23 64]]
Flattened Array:
 [79  5 52 15 78 93 73 83 28 48 19 30 57 36 41 27 78  3 11 48 36 73 82 76
  8 20 11 15 13 37 29 18 84 20 74 18 99 79  1 69 47 72 86 32 94 59 81 35
 23 64]
Flattened Array (Column-wise):
 [79 93 19 27 36 20 29 18 47 59  5 73 30 78 73 11 18 99 72 81 52 83 57  3
 82 15 84 79 86 35 15 28 36 11 76 13 20  1 32 23 78 48 41 48  8 37 74 69
 94 64]
Concatenated Array (Row-wise):
 [[79  5 52 15 78]
 [93 73 83 28 48]
 [19 30 57 36 41]
 [27 78  3 11 48]
 [36 73 82 76  8]
 [20 11 15 13 37]
 [29 18 84 20 74]
 [18 99 79  1 69]
 [47 72 86 32 94]
 [59 81 35 23 64]
 [38 91 88 88 11]]
Transposed Array:
 [[79 93 19 

# Math Operators(Arithmetic) & Functions

In [None]:
import numpy as np

a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])

# Arithmetic Operations
sum_arr = a + b     # It can also be done using np.add(a,b)
sub_arr = a - b     # It can also be done using np.subtract(a,b)
mul_arr = a * b     # It can also be done using np.multiply(a,b)
div_arr = a / b     # It can also be done using np.divide(a,b)
rem_arr = a % b     # It can also be done using np.mod(a,b)

print("Sum:\n", sum_arr)
print("Subtraction:\n", sub_arr)
print("Multiplication:\n", mul_arr)
print("Division:\n", div_arr)
print("Remainder:\n", rem_arr)

# Mathematical Functions
# 1. Trigonometric Functions
angles = np.array([0, 30, 45, 60, 90])
radians = np.radians(angles)    # Convert degrees to radians. It can also be done using np.deg2rad(angles)
degrees = np.degrees(radians)  # Convert radians to degrees. It can also be done using np.rad2deg(radians)

sine_values = np.sin(radians)
cosine_values = np.cos(radians)
tangent_values = np.tan(radians)

arc_sine_values = np.arcsin(sine_values)
arc_cosine_values = np.arccos(cosine_values)
arc_tangent_values = np.arctan(tangent_values)


print("Sine Values:\n", sine_values)
print("Cosine Values:\n", cosine_values)
print("Tangent Values:\n", tangent_values)

print("Arc Sine Values:\n", arc_sine_values)
print("Arc Cosine Values:\n", arc_cosine_values)
print("Arc Tangent Values:\n", arc_tangent_values)

# 2. Exponential and Logarithmic Functions
exp_values = np.exp(a)
log_values = np.log(a)          # Natural logarithm
log10_values = np.log10(a)    # Base 10 logarithm
log2_values = np.log2(a)
sqrt_values = np.sqrt(a)

print("Exponential Values:\n", exp_values)
print("Natural Logarithm Values:\n", log_values)
print("Base 10 Logarithm Values:\n", log10_values)
print("Base 2 Logarithm Values:\n", log2_values)
print("Square Root Values:\n", sqrt_values)


# 3. Sum, Products, Differences, and Cumulative Functions
sum_total = np.sum(a)                # Sum of all elements, To get only row wise or column wise sum use axis parameter like np.sum(a, axis=0) for column wise sum and np.sum(a, axis=1) for row wise sum

cum_sum = np.cumsum(a)         # Cumulative sum of elements. 

# The difference between sum and cumsum is that sum returns a single value representing the total sum of all elements in the array, whereas cumsum returns an array of the same shape as the input, where each element at index i is the sum of all elements from index 0 to i in the original array.

difference = np.diff(a)          # Difference between consecutive elements along the specified axis. By default, it is calculated along the last axis (axis=-1).
prod_total = np.prod(a)          # Product of all elements

print("Total Sum:\n", sum_total)
print("Cumulative Sum:\n", cum_sum)
print("Difference between Consecutive Elements:\n", difference)
print("Total Product:\n", prod_total)

Sum:
 [[10 10 10]
 [10 10 10]
 [10 10 10]]
Subtraction:
 [[-8 -6 -4]
 [-2  0  2]
 [ 4  6  8]]
Multiplication:
 [[ 9 16 21]
 [24 25 24]
 [21 16  9]]
Division:
 [[0.11111111 0.25       0.42857143]
 [0.66666667 1.         1.5       ]
 [2.33333333 4.         9.        ]]
Remainder:
 [[1 2 3]
 [4 0 2]
 [1 0 0]]
Sine Values:
 [0.         0.5        0.70710678 0.8660254  1.        ]
Cosine Values:
 [1.00000000e+00 8.66025404e-01 7.07106781e-01 5.00000000e-01
 6.12323400e-17]
Tangent Values:
 [0.00000000e+00 5.77350269e-01 1.00000000e+00 1.73205081e+00
 1.63312394e+16]
Arc Sine Values:
 [0.         0.52359878 0.78539816 1.04719755 1.57079633]
Arc Cosine Values:
 [0.         0.52359878 0.78539816 1.04719755 1.57079633]
Arc Tangent Values:
 [0.         0.52359878 0.78539816 1.04719755 1.57079633]
Exponential Values:
 [[2.71828183e+00 7.38905610e+00 2.00855369e+01]
 [5.45981500e+01 1.48413159e+02 4.03428793e+02]
 [1.09663316e+03 2.98095799e+03 8.10308393e+03]]
Natural Logarithm Values:
 [[0.     

# Broadcasting in NumPy

In [13]:
# Broadcasting : In NumPy, broadcasting refers to the ability of the library to perform arithmetic operations on arrays of different shapes in a way that makes them compatible for element-wise operations. Broadcasting automatically expands the smaller array along the dimensions of the larger array so that they have compatible shapes. This allows for efficient computations without the need to create large intermediate arrays, saving both memory and processing time.
# In-short: Expanding the smaller array to match the shape of the larger array for element-wise( Vectorized ) operations.

# In a nutshell, broadcasting follows these rules:
# 1. If the arrays have a different number of dimensions, the shape of the smaller array is padded with ones on the left side until both shapes have the same length. Here added with ones on the left side means if we have a 1D array of shape (3,) and a 2D array of shape (3,3), the 1D array is treated as having shape (1,3) for the purpose of broadcasting.
# 2. The sizes of the dimensions are compared element-wise from right to left.Here from right to left means we start comparing the dimensions from the last dimension to the first dimension. Two dimensions are considered compatible when:
#    - They are equal, or 
#    - One of them is 1 
# 3. If the sizes of the dimensions are not compatible, a broadcasting error is raised.

# Vector: 1D array
# Matrix: 2D array

import numpy as np
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])    #3*3 matrix -> shape (3,3)
b = np.array([10, 20, 30])      #1*3 vector -> shape (1,3) (smaller array, so it will be broadcasted to match the shape of a -> shape (3,3))
result = a + b 

# Here , the 1D array b is broadcasted across each row of the 2D array a, converting b to [[10, 20, 30], [10, 20, 30], [10, 20, 30]] before performing the addition.
print("Result of Broadcasting Addition:\n", result)

# But if the b array was [10,20] or [10,20,30,40] it would raise an error as the dimensions are not compatible for broadcasting.

Result of Broadcasting Addition:
 [[11 22 33]
 [14 25 36]
 [17 28 39]]


# Logical & Comparison Operations

In [None]:
#  Comparison Operators : >, <, >=, <=, ==, !=
# This operators compare corresponding elements of two arrays and return a boolean array indicating the result of the comparison for each element.

# Logical Operators : and, or, not  -> In NumPy we use & for and, | for or, ~ for not
# These operators perform element-wise logical operations on boolean arrays, returning a boolean array as the result.
# Note: When using logical operators with NumPy arrays, make sure to use parentheses around the conditions to ensure proper evaluation order.

# all(), any() functions can also be used to check if all or any of the elements in the array satisfy a certain condition. This functions return a single boolean value.

a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[5, 2, 3], [1, 7, 6], [7, 0, 10]])

greater_than = a > b
less_than = a < b
greater_equal = a >= b
less_equal = a <= b
equal = a == b
not_equal = a != b
logical_and = (a > 2) & (b < 8)
logical_or = (a < 3) | (b > 5)
logical_not = ~(a > 5)
all_greater_than_0 = np.all(a > 0)
any_less_than_0 = np.any(a < 0)


print("Greater Than:\n", greater_than)
print("Less Than:\n", less_than)
print("Greater Than or Equal:\n", greater_equal)
print("Less Than or Equal:\n", less_equal)
print("Equal:\n", equal)
print("Not Equal:\n", not_equal)
print("Logical AND:\n", logical_and)
print("Logical OR:\n", logical_or)
print("Logical NOT:\n", logical_not)
print("All elements greater than 0:", all_greater_than_0)
print("Any element less than 0:", any_less_than_0)

Greater Than:
 [[False False False]
 [ True False False]
 [False  True False]]
Less Than:
 [[ True False False]
 [False  True False]
 [False False  True]]
Greater Than or Equal:
 [[False  True  True]
 [ True False  True]
 [ True  True False]]
Less Than or Equal:
 [[ True  True  True]
 [False  True  True]
 [ True False  True]]
Equal:
 [[False  True  True]
 [False False  True]
 [ True False False]]
Not Equal:
 [[ True False False]
 [ True  True False]
 [False  True  True]]
Logical AND:
 [[False False  True]
 [ True  True  True]
 [ True  True False]]
Logical OR:
 [[ True  True False]
 [False  True  True]
 [ True False  True]]
Logical NOT:
 [[ True  True  True]
 [ True  True False]
 [False False False]]
All elements greater than 0: True
Any element less than 0: False


# Sorting, Searching & Counting

In [None]:
# Sorting: np.sort(), np.argsort()

a = np.array([[3, 1, 2], [6, 4, 5], [9, 7, 8]])
sorted_arr = np.sort(a, axis=1)  # Sort each row in ascending order of elements
argsorted_indices = np.argsort(a, axis=0)  # Indices that would sort each column. Argsort returns the indices(the positions) that would sort an array, rather than the sorted array itself. Why this is useful? Because sometimes you may want to know the order of elements without actually sorting the array, for example, when you want to sort one array based on the values of another array. 

# arr = np.array([30, 10, 20]).sort()          # → array([10, 20, 30])
# np.argsort(arr)        # → array([1, 2, 0])  (indices that would sort the array)
# This means: element at index 1 (10) comes first, then index 2 (20), then index 0 (30).

print("Sorted Array (Row-wise):\n", sorted_arr)
print("Argsorted Indices (Column-wise):\n", argsorted_indices)

# Searching: np.where(), np.argmax(), np.argmin()

search_indices = np.where(a > 5)  # Indices of elements greater than 5 (condition, x(optional), y(optional)): if true then x, otherwise replace with y -> returns indices not elements where condition is true if x and y are not provided.
search_elements = np.where(a > 5, a, 0)  # Elements greater than 5, others replaced with 0
argmax_index = np.argmax(a)       # Index of the maximum element in the flattened array. The max() function returns the maximum value itself, while argmax() returns the index of that maximum value.
argmin_index = np.argmin(a)       # Index of the minimum element in the flattened array. The min() function returns the minimum value itself, while argmin() returns the index of that minimum value.

print("Indices of Elements Greater than 5:\n", search_indices)      # Gives row array and column array of indices. To get the element, use row array[i] and column array[i] where i is the index of the element in the condition satisfying elements.
print("Elements Greater than 5 (others replaced with 0):\n", search_elements)
print("Index of Maximum Element:\n", argmax_index)
print("Index of Minimum Element:\n", argmin_index)

# Counting: np.count_nonzero(), np.unique()

nonzero_count = np.count_nonzero(a)  # Count of non-zero elements (condition can also be provided like np.count_nonzero(a > 5) to count elements greater than 5)
unique_elements = np.unique(a, return_counts=True)        # Unique elements in the array (array, options like return_counts=True to get the count of each unique element or return_index=True to get the indices of the first occurrences of the unique elements)

print("Count of Non-Zero Elements:\n", nonzero_count)
print("Unique Elements:\n", unique_elements)

Sorted Array (Row-wise):
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
Argsorted Indices (Column-wise):
 [[0 0 0]
 [1 1 1]
 [2 2 2]]
Indices of Elements Greater than 5:
 (array([1, 2, 2, 2]), array([0, 0, 1, 2]))
Elements Greater than 5 (others replaced with 0):
 [[0 0 0]
 [6 0 0]
 [9 7 8]]
Index of Maximum Element:
 6
Index of Minimum Element:
 1
Count of Non-Zero Elements:
 9
Unique Elements:
 (array([1, 2, 3, 4, 5, 6, 7, 8, 9]), array([1, 1, 1, 1, 1, 1, 1, 1, 1]))


# Statistical Functions

In [1]:
# Statistical methods: np.mean(), np.median(), np.std(), np.var(), np.min(), np.max()

import numpy as np
data = np.genfromtxt('students_scores.csv', delimiter=',', skip_header=1 )  # csv contains numerical data separated by commas
print("Data:\n", data)

# Statistical methods
mean = np.mean(data, axis=0)
median = np.median(data, axis=0)
std_dev = np.std(data, axis=0)
variance = np.var(data, axis=0)
minimum = np.min(data, axis=0)
maximum = np.max(data, axis=0)

math_marks = data[::, :1].T  # Assuming math scores are in the first column (index 0)
# Then we can also calculate statistical methods for specific columns like math_marks as same as data array.

print("Mean:\n", mean)
print("Median:\n", median)
print("Standard Deviation:\n", std_dev)
print("Variance:\n", variance)
print("Minimum:\n", minimum)
print("Maximum:\n", maximum)

print("Math Marks:\n", math_marks)

# Correlation and Covariance: np.corrcoef(), np.cov()

# Correlation matrix: It measures the strength and direction of the linear relationship between two variables. The values range from -1 to 1, where 1 indicates a perfect positive correlation, -1 indicates a perfect negative correlation, and 0 indicates no correlation.
# Covariance matrix: It measures how much two random variables vary together. A positive covariance indicates that the variables tend to increase or decrease together, while a negative covariance indicates that one variable tends to increase when the other decreases. Unlike correlation, covariance values are not standardized and can take any value.

study_hours = np.array([2,4,5,7,8])  
scores = np.array([65,75,78,88,92])            

prediction = np.array([study_hours, scores])  

correlation_matrix = np.corrcoef(prediction)    
covariance_matrix = np.cov(prediction)

print("Correlation Matrix:\n", correlation_matrix)
print("Covariance Matrix:\n", covariance_matrix)

# Note : Use np.cov() to understand joint variability; use np.corrcoef() to understand how strong and in what direction two variables move together - without being misled by units or scale.

Data:
 [[85. 92. 78.]
 [76. 81. 88.]
 [92. 87. 91.]
 [88. 94. 85.]
 [73. 79. 82.]
 [95. 89. 93.]
 [81. 76. 79.]
 [89. 91. 87.]
 [78. 84. 90.]
 [91. 88. 86.]
 [84. 77. 81.]
 [87. 93. 89.]
 [79. 82. 84.]
 [93. 96. 92.]
 [82. 80. 77.]
 [88. 85. 94.]
 [75. 78. 83.]
 [90. 92. 88.]
 [86. 89. 91.]
 [94. 87. 95.]]
Mean:
 [85.3  86.   86.65]
Median:
 [86.5 87.  87.5]
Standard Deviation:
 [6.43506022 5.95818764 5.26569084]
Variance:
 [41.41   35.5    27.7275]
Minimum:
 [73. 76. 77.]
Maximum:
 [95. 96. 95.]
Math Marks:
 [[85. 76. 92. 88. 73. 95. 81. 89. 78. 91. 84. 87. 79. 93. 82. 88. 75. 90.
  86. 94.]]
Correlation Matrix:
 [[1.         0.99859154]
 [0.99859154 1.        ]]
Covariance Matrix:
 [[  5.7  25.6]
 [ 25.6 115.3]]


# Linear Algebra Operations

In [None]:
# Matrix Operations

a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]])

# Dot Product
dot_product = np.dot(a, b)
print("Dot Product:\n", dot_product)

# Matrix Multiplication
matmul_product = np.matmul(a, b)
print("Matrix Multiplication:\n", matmul_product)

# The difference between np.dot() and np.matmul() is that np.dot() can handle both 1D and 2D arrays, performing dot products for 1D arrays and matrix multiplication for 2D arrays. In contrast, np.matmul() is specifically designed for matrix multiplication and requires at least 2D arrays. Additionally, np.matmul() supports broadcasting for higher-dimensional arrays, while np.dot() does not.

# Trace
trace_a = np.trace(a)
print("Trace of A:\n", trace_a)

# Inverse
inverse_a = np.linalg.inv(a)
print("Inverse of A:\n", inverse_a)

# Determinant
det_a = np.linalg.det(a)
print("Determinant of A:\n", det_a)

# Rank
rank_a = np.linalg.matrix_rank(a)
print("Rank of A:\n", rank_a)

# Eigenvalues and Eigenvectors
# Eigenvalues: Scalars that indicate how much the eigenvectors are stretched or compressed during a linear transformation represented by the matrix.
# Eigenvectors: Non-zero vectors that only change by a scalar factor when a linear transformation is applied to them.
eigenvalues, eigenvectors = np.linalg.eig(a)
print("Eigenvalues of A:\n", eigenvalues)
print("Eigenvectors of A:\n", eigenvectors)

Dot Product:
 [[ 66  72  78]
 [156 171 186]
 [246 270 294]]
Matrix Multiplication:
 [[ 66  72  78]
 [156 171 186]
 [246 270 294]]
Trace of A:
 15
Inverse of A:
 [[ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]
 [-6.30503948e+15  1.26100790e+16 -6.30503948e+15]
 [ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]]
Determinant of A:
 -9.51619735392994e-16
Rank of A:
 2
Eigenvalues of A:
 [ 1.61168440e+01 -1.11684397e+00 -3.38433605e-16]
Eigenvectors of A:
 [[-0.23197069 -0.78583024  0.40824829]
 [-0.52532209 -0.08675134 -0.81649658]
 [-0.8186735   0.61232756  0.40824829]]


In [12]:
tolist = a.tolist()
print("Array converted to List:\n", tolist)

# By this method, we can convert a NumPy array into a standard Python list. Each row of the array becomes a sublist within the main list.


print(np.sin(np.pi/2))

Array converted to List:
 [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
1.0
