In [None]:
import numpy as np
import time

In [None]:
# Coverting a list to a NumPy Array
array = np.array([1, 2, 3, 4, 5])


## Python Lists vs NumPy Arrays 
- NumPy Arrays are faster (optimized in C)
- NUmpy stores data in continuous memory blocks
- Uses less memory (optimized storage)
- Built in Mathematical Functions 
- Supports direct vectorized operations


In [None]:
# Execution Time Performace Comparison

size = 1_000_000

# Python List
py_list = list(range(size)) # [1, 2, 3, 4, .....1,000,000]
start = time.time() # Current time
sq_list = [x ** 2 for x in py_list]
end = time.time() # Time after calculating squares
print(f"Python List Time = {end - start} seconds")

# NumPy Array
np_array = np.array(py_list)
start = time.time()
sq_array = np_array ** 2 # Used vectorization
end = time.time()
print(f"NumPy Array Time = {end - start} seconds")


In [None]:
# Memory Occupied Comparison

import sys
print(f"Python list size = {sys.getsizeof(py_list) * len(py_list)} bytes")
print(f"NumPy Array size = {np_array.nbytes} bytes")

In [None]:
# Create NumPy Arrays - From lists 
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1, type(arr1), arr1.shape) # Shape stands for dimension (m X n)

arr2 = np.array([1, 2, 3, 4, 5, "Adi"])
print(arr2, type[arr2], arr2.dtype, arr2.shape)

# 2D Arrays - Matrix
arr2D = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
print(arr2D, arr2D.shape) # (4 x 3)

In [None]:
# Create NumPy Arrays from Scratch 

arr1 = np.zeros((2,3)) # Prefill with 0s
print(f"arr1: \n {arr1}, {arr1.shape}")

arr2 = np.zeros((2,3), dtype = "int64")
print(f"arr2: \n {arr2}, {arr2.shape}")

arr3 = np.ones((2,3), dtype = "int64") # Prefill with 1s
print(f"arr3: \n {arr3}, {arr3.shape}")

arr4 = np.full((3, 4), 100) # Prefill with 100s
print(f"arr4: \n {arr4}")

arr5 = np.eye(5, dtype = "int64") # Creates an Identity Matrix
print(f"arr5: \n {arr5}")

arr6 = np.arange(1, 10) # Creates an array of numbers from 1 to 9 (Works similar to range())
print(f"arr6: \n {arr6}")

arr7 = np.linspace(1, 100, 4) # We want an array of size 4 that lies in the range of 1 to 100
print(f"arr7: \n {arr7}") # The values obtained are evenly spaced.

In [None]:
# NumPy Array Properties
arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
print(arr.shape) # dimensions of arr (2, 5)
print(arr.size) # Total number of elements 
print(arr.dtype) # int64
print(arr.ndim) # Number of dimensions

# Type Casting
float_arr = arr.astype(np.float64) # A new array with data type of float64
print(float_arr, float_arr.dtype)

---

## Operations on NumPy Arrays
- Reshaping
- Indexing (1D and 2D Array)
- Fancy and Boolean Indexing
- Slicing

In [None]:
# Reshaping - Change the dimensions
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr, arr.shape)

reshaped = arr.reshape((3, 2)) # Change from (2, 3) to (3, 2)
print(reshaped, reshaped.shape)

# Flattened - Convert to a 1D Array
flattened = reshaped.flatten()
print(flattened, flattened.shape)

In [None]:
# Indexing
arr1 = np.array([1, 2, 3, 4, 5]) # 1D Array
print(arr1[0], arr1[3]) 
arr2 = np.array([[1, 2, 3], [4, 5, 6]]) # 2D Array
print(arr1[2], arr2[1, 1])

# Fancy Indexing (Can access a particular set of indexes together)
idx = [0, 1, 3]
print(arr1[idx])

# Boolean Indexing
print(arr1[arr1 > 2]) # Array values greater than 2
print(arr1[arr1 % 2 == 0]) # Even array values

In [None]:
# Slicing
arr = np.array([1, 2, 3, 4, 5, 6, 7])

print(arr[2:6]) #[3, 4, 5, 6]
print(arr[:6]) #[1, 2, 3, 4, 5, 6]
print(arr[3:]) #[4, 5, 6, 7]
print(arr[::2]) #[1, 3, 5, 7]

In [None]:
# Copy vs View by slicing
# Sliced list is a copy
# Sliced array is a view (We are looking at the original values and not a new copy
nums = [1, 2, 3, 4, 5]
sub_list = nums[1:3]
print(sub_list)
sub_list[0] = 200
print(sub_list)
print(nums)
# Only the sub_list changes, the nums list does not change

arr = np.array([1, 2, 3, 4, 5, 6, 7])
sub_arr = arr[1:3]
print(sub_arr)
sub_arr[0] = 200
print(sub_arr)
print(arr)
# Both the sub array and the main array changed

---

 # NumPy Common Data Types 
 - Integers: <i><b>int32, int64</b></i>
 - Floating point nums: <i><b>float32, float64</b></i>
 - Boolean: <i><b>bool</b></i>
 - Complex Nums: <i><b>complex64, complex128</b></i>
 - String: <i><b>S</b></i> (byte-str) & <i><b>U</b></i> (unicode-str)
 - Object: generic python objects - <i><b>object</b></i>

In [None]:
# Data Types 
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1, arr1.dtype)
arr2 = np.array([1, 2, 3.4, 4, 5])
print(arr2, arr2.dtype)
arr3 = np.array(["hello", "world", "Adi", "tree"])
print(arr3, arr3.dtype)

# Complex Numbers
arr1 = np.array([2 + 3j])
arr2 = np.array([5 + 8j])
print(arr1, arr1.dtype)
print(arr1 + arr2)
print(arr2 - arr1)    

'''
Better to not use String and Object data types in NumPy as NumPy is meant
for numerical computation.
'''
# Objects
arr = np.array(["hello", {1, 2, 3}, 3.14])
print(arr, arr.dtype)

# Axes in Multi-Dimensional Arrays:
- 1D array has 1 axis (axis 0).
- 2D array has 2 axes (axis0 = rows (Vertical operations), axis1 = columns Horizontal operations)
- 3D array has 3 axes (axis0 = depth/layer , axis1 = rows in each layer, axis2 = columns in each layer)

In [None]:
arr2D = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]);
print(arr2D)

print(np.sum(arr2D)) # Sum of all values in the 2D Array

sum_of_columns = np.sum(arr2D, axis = 0) # Use axis 0 to calculate the sum of all column values
print(sum_of_columns)

sum_of_rows = np.sum(arr2D, axis = 1) # Use axis 1 to calculate the sum of rows
print(sum_of_rows)

print(arr2D[0:2, 1:2]) # Slicing Operations in 2D Arrays

In [None]:
# 3D Arrays
arr3D = np.array([[[1, 2], [3, 4], [5, 6]], [[7, 8],[9, 10], [11, 12]]]) # 2 x (3 X 2) (2 2D arrays of size 3 X 2)
print(arr3D, arr3D.shape)

# Indexing
print(arr3D[0, 1, 1]) # 4 
print(arr3D[1, 1, 0]) # 9

# Slicing 
print(arr3D[:, 0, :]) # First row from both the layers (: refers to all the layers)
print(arr3D[:, :, 0]) # First column from both the layers

# Replacing all the first row values with 99 and all the first column values with 100
arr3D[:, 0, :] = 99
arr3D[:, :, 0] = 100
print(arr3D)

# Vectorization and Broadcasting 
<b>1. Vectorization</b>: It is the process of applying operations to entire arrays or sequences of data at once, rather than iterating through individual elements using explicit loops. 

<b>2. Broadcasting</b>: It is a mechanism that allows NumPy to perform operations on arrays of different shapes without explicitly reshaping them (creating copies of the smaller array to match the larger array's shape).

<b> Broadcasting Condition <b>

For broadcasting to happen the dimensions should be compatible. Numpy compares shape element-wise. It starts dimension comparison with the trailing (i.e. rightmost) dimension and works its way left. 
Two dimensions are compatible when <ol> <li>they are equal, or</li> <li>one of them is 1. </li>

In [None]:
# Vectorization

arr = np.array([1, 2, 3, 4, 5])
arr2 = np.array([6, 7, 8, 9, 10])

print(arr ** 2) # Square of all values in the array
print(arr + 10) # Add 10 to all the elements in the array 
print(arr + arr2) # Add 2 arrays

In [None]:
# Broadcasting (Scaling)

# We can add [1, 2, 3] and 5 if we can scale 5 to [5, 5, 5] to make it compatible with the large array.
arr = np.array([1, 2, 3, 4, 5])
arr2 = np.array([[1, 2, 3, 4, 5], [1, 2, 3, 4, 5]])

print(arr.shape)
print(arr2.shape)

print(arr + 5)
print(arr + arr2)

In [None]:
# Vector Normalization
# Normalizing an array means transforming its values so fit into a specific scale or range.

# Standard Normalization ==> (arr - mean)/stdev

arr = np.array([[1, 2], [3, 4]])
arr_mean = np.mean(arr)
stdev = np.std(arr)

normalized_array = (arr - arr_mean)/stdev
print(normalized_array)

# When doing standard normalization, the mean will be 0 and standard deviation will be 1
print(np.mean(normalized_array))
print(np.std(normalized_array))

# Mathematical Functions

## Aggregation    
- sum()
- prod()
- min()
- max()
- mean()
- std()
- var()
- median()
- argmin()
- argmax()

## Power
- square()
- sqrt()
- pow(a,b)

## Log and Exponential
- log()
- log10()
- log2()
- exp()

## Rounding 
- round()
- floor()
- ceil()
- trunc()

## Extras
- unique()
- sort()
- abs()


In [None]:
# Aggregation

arr = np.array([1, 2, 3, 4, 5])

print(np.sum(arr))     # 15
print(np.prod(arr))    # 120
print(np.min(arr))     # 1
print(np.argmin(arr))  # 0 
print(np.max(arr))     # 5 
print(np.argmax(arr))  # 4
print(np.mean(arr))    # 3.0
print(np.median(arr))  # 3.0
print(np.std(arr))     # 1.41
print(np.var(arr))     # 2.0 (Variance)

In [None]:
# Power Functions 
print(np.square(arr))    # [1, 4, 9, 16, 25]
print(np.sqrt(arr))      # [1, 1.41, 1.73, 2, 2.23]
print(np.pow(arr, 3))    # [1, 8, 27, 64, 125]

In [None]:
# Logarithmic Functions 
print(np.log(arr))
print(np.log10(arr))
print(np.log2(arr))
print(np.exp(arr))

In [None]:
# Rounding Functions
print(np.round(2.678))    # 3.0
print(np.floor(2.678))    # 2.0
print(np.ceil(2.678))     # 3.0
print(np.trunc(2.678))    # 2.0

In [None]:
# Additional Functions
arr = np.array([1, 2, -5, 3, 8, -4, 2, 5])
print(np.abs(arr))     # [1 2 5 3 8 4 2 5]
print(np.sort(arr))    # [-5 -4  1  2  2  3  5  8]
print(np.unique(arr))  # [-5 -4  1  2  3  5  8]