# NUMPY

In [2]:
import numpy as np

## Creation

### List & Tupule

In [3]:
# From a list
list_array = np.array([1, 2, 3, 4, 5])
print("Array from list:", list_array)

# From a tuple
tuple_array = np.array((6, 7, 8, 9, 10))
print("Array from tuple:", tuple_array)

Array from list: [1 2 3 4 5]
Array from tuple: [ 6  7  8  9 10]


### Built-in Functions

In [4]:
# Array of zeros
zeros_array = np.zeros(5)
print("Array of zeros:", zeros_array)

# Array of ones
ones_array = np.ones(5)
print("Array of ones:", ones_array)

# Array with a range of values
range_array = np.arange(10)
print("Array with range:", range_array)

# Array with linearly spaced values
linspace_array = np.linspace(0, 1, 5)
print("Array with linearly spaced values:", linspace_array)

# Identity matrix
identity_matrix = np.eye(3)
print("Identity matrix:\n", identity_matrix)

Array of zeros: [0. 0. 0. 0. 0.]
Array of ones: [1. 1. 1. 1. 1.]
Array with range: [0 1 2 3 4 5 6 7 8 9]
Array with linearly spaced values: [0.   0.25 0.5  0.75 1.  ]
Identity matrix:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


### Random Built-in Functions

In [5]:
# Random array with values between 0 and 1
rand_array = np.random.rand(5)
print("Random array:", rand_array)

# Random array with integers
randint_array = np.random.randint(0, 10, size=5)
print("Random integer array:", randint_array)

# Random normal distribution
normal_array = np.random.randn(5)
print("Array from normal distribution:", normal_array)

Random array: [0.01273621 0.60833842 0.79372128 0.05582399 0.87055862]
Random integer array: [6 1 8 3 7]
Array from normal distribution: [-0.74914634 -1.63495157 -2.47470123 -0.70718899 -0.75701803]


### Special Built-in Functions

In [6]:
# Repeat each element 3 times
repeat_array = np.repeat([1,2,3], 3)
print("Repeated array:", repeat_array)

# Tile the whole array 3 times
tile_array = np.tile([1,2,3], 3)
print("Tiled array:", tile_array)

Repeated array: [1 1 1 2 2 2 3 3 3]
Tiled array: [1 2 3 1 2 3 1 2 3]


### Creating from Functions
- where each element corresponds to its index or a function of its index.

In [7]:
# Create an array using a function
def generate_values(x, y):
    return x * y

function_array = np.fromfunction(generate_values, (5, 5), dtype=int)
print("Array from function:\n", function_array)

Array from function:
 [[ 0  0  0  0  0]
 [ 0  1  2  3  4]
 [ 0  2  4  6  8]
 [ 0  3  6  9 12]
 [ 0  4  8 12 16]]


## Data Type

### List

In [8]:
python_list = [1, 'a', 3.14, [5, 6, 7], {'name': 'John'}]
python_list

[1, 'a', 3.14, [5, 6, 7], {'name': 'John'}]

### NumPy Array

In [9]:
import numpy as np
arr = np.array([1, 2, 3], dtype=np.float32)

In [10]:
new_arr = np.append(arr, [4, 5, 6])
print(new_arr)

[1. 2. 3. 4. 5. 6.]


In [11]:
new_arr = np.append(arr, ['s', 'd', 'p'])
print(new_arr)
print(new_arr.dtype)

['1.0' '2.0' '3.0' 's' 'd' 'p']
<U32


In [12]:
np.abs(new_arr)

UFuncTypeError: ufunc 'absolute' did not contain a loop with signature matching types <class 'numpy.dtype[str_]'> -> None

## Insert Data

In [13]:
arr = np.array([1, 2, 3])
new_arr = np.append(arr, [4, 5, 6])

print("Original Array:", arr)
print("New Array:", new_arr)

Original Array: [1 2 3]
New Array: [1 2 3 4 5 6]


In [14]:
arr = np.array([1, 2, 3])
new_arr = np.insert(arr, 1, [4, 5])

print("Original Array:", arr)
print("New Array:", new_arr)

Original Array: [1 2 3]
New Array: [1 4 5 2 3]


In [15]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

merged_arr = np.hstack((arr1, arr2))
print("Merged Array:", merged_arr)

Merged Array: [1 2 3 4 5 6]


## Indexing

In [16]:
l = [1,2,3,4,5]
arr = np.array(l)

### basic

In [17]:
print(l[1], arr[1])
print(l[1:4:2], arr[1:4:2])

2 2
[2, 4] [2 4]


### Boolean Indexing 

In [18]:
print(arr[arr > 3])  # Outputs [4 5]

[4 5]


In [19]:
index_list = arr > 3
print(index_list)

[False False False  True  True]


In [20]:
print(arr[index_list])

[4 5]


In [21]:
print(l[l > 3]) 

TypeError: '>' not supported between instances of 'list' and 'int'

### Fancy Indexing
- index arrays using integer arrays

In [22]:
indices = np.array([1, 3])
print(arr[indices])  # Outputs [20 40]

[2 4]


In [23]:
indices = [1,3]
l[indices]

TypeError: list indices must be integers or slices, not list

## Matrix

### element-wise Operations

In [24]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[2, 2], [2, 2]])
print(A + B)  # Element-wise addition
print(A * B)  # Element-wise multiplication

[[3 4]
 [5 6]]
[[2 4]
 [6 8]]


### Matrix Multiplications

In [25]:
print(A.dot(B))   # Matrix multiplication
print(A @ B)      # Another way for matrix multiplication

[[ 6  6]
 [14 14]]
[[ 6  6]
 [14 14]]


### Advanced Operations 

In [26]:
print(A.T)  # Transpose of matrix A

[[1 3]
 [2 4]]


In [27]:
from numpy.linalg import det, inv, matrix_rank
from numpy import trace

print(det(A))         # Determinant of A
print(inv(A))         # Inverse of A (if it exists)
print(matrix_rank(A)) # Rank of A
print(trace(A))       # Trace of A

-2.0000000000000004
[[-2.   1. ]
 [ 1.5 -0.5]]
2
5


## Vectorization
### Problem:
Let's say you have two sets of points: A and B. You want to compute the Euclidean distances between each point in A and each point in B.

### Non-Vectorized 

In [None]:
A = np.array([[1, 2], [3, 4], [5, 6]])
B = np.array([[7, 8], [9, 10], [11, 12]])
print(A.shape)

In [28]:
distances = np.empty((A.shape[0], B.shape[0]))

for i in range(A.shape[0]):
    for j in range(B.shape[0]):
        distances[i, j] = np.sqrt(np.sum((A[i] - B[j])**2))

print(distances)

[[1.         1.        ]
 [2.23606798 2.23606798]]


### Vectorized

In [None]:
A[:, np.newaxis, :].shape

In [None]:
B[np.newaxis, :, :].shape

In [None]:
differences = A[:, np.newaxis, :] - B[np.newaxis, :, :]
differences.shape

In [29]:
# Broadcasting A and B for difference calculation
differences = A[:, np.newaxis, :] - B[np.newaxis, :, :]

# Squaring and summing differences
squared_differences = differences ** 2
summed_squared_differences = squared_differences.sum(axis=-1)

# Taking the square root for final distances
distances_vectorized = np.sqrt(summed_squared_differences)

print(distances_vectorized)

[[1.         1.        ]
 [2.23606798 2.23606798]]


### DIFF

In [30]:
import time

# Generate larger random datasets
A_large = np.random.rand(1000, 2)
B_large = np.random.rand(1000, 2)

# Time the non-vectorized version
start_time = time.time()
for i in range(A_large.shape[0]):
    for j in range(B_large.shape[0]):
        _ = np.sqrt(np.sum((A_large[i] - B_large[j])**2))
non_vectorized_time = time.time() - start_time

# Time the vectorized version
start_time = time.time()
differences = A_large[:, np.newaxis, :] - B_large[np.newaxis, :, :]
squared_differences = differences ** 2
summed_squared_differences = squared_differences.sum(axis=-1)
_ = np.sqrt(summed_squared_differences)
vectorized_time = time.time() - start_time

print(f"Non-vectorized time: {non_vectorized_time:.4f} seconds")
print(f"Vectorized time: {vectorized_time:.4f} seconds")

Non-vectorized time: 4.9085 seconds
Vectorized time: 0.0401 seconds


## BroadCasting

### Example 1

In [None]:
a = np.array([1, 2, 3])
b = 2

result = a * b
print(result)  # Outputs: [2, 4, 6]

In [None]:
a = np.array([1, 2, 3])
b = np.array([2, 2, 2])

result = a * b
print(result)  # Outputs: [2, 4, 6]

### Example 2

In [None]:
A = np.array([[1, 2], [3, 4], [5, 6]])
b = np.array([2, 3])

result = A * b
print(result)

### Example 3

In [None]:
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
b = np.array([[1], [2], [3]])

result = A + b
print(result)

### Example 4

In [None]:
A = np.array([[1, 2], [3, 4], [5, 6]])
b = np.array([[1], [2]])

result = A + b.T
print(result)