# Numpy

**Vectorization** : Vectorization in NumPy refers to applying operations on entire arrays without using explicit loops. These operations are internally optimized using fast C/C++ implementations, making numerical computations more efficient and easier to write.
**Why Vectorization Matters?**
* Improves Performance: Eliminates Python-level loops and leverages fast low-level implementations.
* Produces Cleaner Code: Fewer lines, easier to maintain.
* Scales Better: Can efficiently handle large scientific data and machine learning workloads.

## Different ways to create arrays

In [9]:
import numpy as np
arr1 = np.array([1, 2, 3, 4, 5])
print(f"From list: {arr1}")

From list: [1 2 3 4 5]


In [76]:
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(f"2D array:\n{arr2}")

2D array:
[[1 2 3]
 [4 5 6]]


In [77]:
arr3 = np.zeros(5)
print(f"Zeros: {arr3}")

Zeros: [0. 0. 0. 0. 0.]


In [78]:
arr4 = np.ones((3,4))
print(f"Ones: {arr4}")

Ones: [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [79]:
arr5 = np.arange(0, 10, 2)  # start, stop, step
print(f"Range: {arr5}")

Range: [0 2 4 6 8]


In [4]:
arr6 = np.linspace(0, 1, 5)  # start, stop, number of points
print(f"Linspace: {arr6}") # (start-end)/ step-1
print(np.linspace(1, 10, 4))# 10-1/3 = 3

Linspace: [0.   0.25 0.5  0.75 1.  ]
[ 1.  4.  7. 10.]


## SHAPES & DIMENSIONS

In [83]:
# 1D array (vector)
arr_1d = np.array([1, 2, 3, 4, 5])
print(f"1D array: {arr_1d}")
print(f"Shape: {arr_1d.shape}")  # (5,) means 5 elements
print(f"Dimensions (ndim): {arr_1d.ndim}")
print(f"Size (total elements): {arr_1d.size}")


1D array: [1 2 3 4 5]
Shape: (5,)
Dimensions (ndim): 1
Size (total elements): 5


In [84]:
# 2D array (matrix)
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
print(f"2D array:\n{arr_2d}")
print(f"Shape: {arr_2d.shape}")  # (2, 3) means 2 rows, 3 columns
print(f"Dimensions: {arr_2d.ndim}")
print(f"Size: {arr_2d.size}")

2D array:
[[1 2 3]
 [4 5 6]]
Shape: (2, 3)
Dimensions: 2
Size: 6


In [3]:
# 3D array (think of it as multiple 2D arrays stacked)
arr_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(f"\n3D array:\n{arr_3d}")
print(f"Shape: {arr_3d.shape}")  # (2, 2, 2)
print(f"Dimensions: {arr_3d.ndim}")


3D array:
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
Shape: (2, 2, 2)
Dimensions: 3


## VECTORIZED OPERATIONS (NO LOOPS!)

In [88]:
# The OLD way (with loops) - SLOW
print("‚ùå OLD WAY (Python list with loop):")
numbers = [1, 2, 3, 4, 5]
doubled = []
for num in numbers:
    doubled.append(num * 2)
print(f"Doubled: {doubled}")


‚ùå OLD WAY (Python list with loop):
Doubled: [2, 4, 6, 8, 10]


In [89]:
# The NEW way (vectorized) - FAST
print("‚úÖ NEW WAY (NumPy vectorized):")
arr = np.array([1, 2, 3, 4, 5])
doubled = arr * 2
print(f"Doubled: {doubled}")

‚úÖ NEW WAY (NumPy vectorized):
Doubled: [ 2  4  6  8 10]


In [90]:
# More vectorized operations
print(f"\nOriginal: {arr}")
print(f"Add 10: {arr + 10}")
print(f"Multiply by 3: {arr * 3}")
print(f"Square: {arr ** 2}")
print(f"Square root: {np.sqrt(arr)}")


Original: [1 2 3 4 5]
Add 10: [11 12 13 14 15]
Multiply by 3: [ 3  6  9 12 15]
Square: [ 1  4  9 16 25]
Square root: [1.         1.41421356 1.73205081 2.         2.23606798]


In [91]:
# Works with 2D arrays too!
matrix = np.array([[1, 2, 3], [4, 5, 6]])
print(f"\nOriginal matrix:\n{matrix}")
print(f"\nMatrix * 2:\n{matrix * 2}")
print(f"\nMatrix + 100:\n{matrix + 100}")



Original matrix:
[[1 2 3]
 [4 5 6]]

Matrix * 2:
[[ 2  4  6]
 [ 8 10 12]]

Matrix + 100:
[[101 102 103]
 [104 105 106]]


In [92]:
# Element-wise operations between arrays
arr1 = np.array([1, 2, 3])
arr2 = np.array([10, 20, 30])
print(f"\narr1: {arr1}")
print(f"arr2: {arr2}")
print(f"arr1 + arr2: {arr1 + arr2}")
print(f"arr1 * arr2: {arr1 * arr2}")



arr1: [1 2 3]
arr2: [10 20 30]
arr1 + arr2: [11 22 33]
arr1 * arr2: [10 40 90]


**Why is NumPy vectorization faster than traditional Python loops?**
NumPy vectorization is faster because the operations are executed in optimized, compiled C code on ```contiguous memory```, which avoids the overhead of Python loops and repeated interpretation of each operation.
Means NumPy vectorization is faster because it performs computations in low-level C code instead of executing slow Python loops element by element.

## np.nan and np.where
np.nan represents "Not a Number" (missing/invalid data)

In [95]:
data = np.array([1.5, 2.3, np.nan, 4.1, np.nan, 6.7])
print(f"Data with NaN values: {data}")

Data with NaN values: [1.5 2.3 nan 4.1 nan 6.7]


## Check for NaN values

In [99]:
print(f"Is NaN? {np.isnan(data)}")
print(f"Number of NaN values: {np.sum(np.isnan(data))}")
print(f"Is NaN? {type(np.isnan(data))}")

Is NaN? [False False  True False  True False]
Number of NaN values: 2
Is NaN? <class 'numpy.ndarray'>


## np.where - conditional selection
## np.where(condition, value_if_true, value_if_false)

In [103]:
ages = np.array([15, 22, 17, 45, 19, 8, 31])
print(f"\nAges: {ages}")
print(ages>18)
# Replace ages < 18 with 0, otherwise keep the age
adult_ages = np.where(ages >= 18, ages, 0)
print(f"Adult ages (< 18 = 0): {adult_ages}")
print(f"Adult ages (< 18 = 0): {type(adult_ages)}")

# Label people as "Adult" or "Minor"
labels = np.where(ages >= 18, "Adult", "Minor")
print(f"Labels: {labels}")



Ages: [15 22 17 45 19  8 31]
[False  True False  True  True False  True]
Adult ages (< 18 = 0): [ 0 22  0 45 19  0 31]
Adult ages (< 18 = 0): <class 'numpy.ndarray'>
Labels: ['Minor' 'Adult' 'Minor' 'Adult' 'Adult' 'Minor' 'Adult']


## AGGREGATIONS (mean, median, sum)

In [106]:
data = np.array([10, 20, 30, 40, 50])
print(f"\nData: {data}")
print(f"Sum: {np.sum(data)}")
print(f"Sum: {type(np.sum(data))}")
print(f"Mean (average): {np.mean(data)}")
print(f"Median (middle value): {np.median(data)}")
print(f"Min: {np.min(data)}")
print(f"Max: {np.max(data)}")
print(f"Standard deviation: {np.std(data)}")


Data: [10 20 30 40 50]
Sum: 150
Sum: <class 'numpy.int64'>
Mean (average): 30.0
Median (middle value): 30.0
Min: 10
Max: 50
Standard deviation: 14.142135623730951


## With 2D arrays - you can aggregate along axes

In [107]:
# With 2D arrays - you can aggregate along axes
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(f"\nMatrix:\n{matrix}")
print(f"Sum of all elements: {np.sum(matrix)}")
print(f"Sum of each column (axis=0): {np.sum(matrix, axis=0)}")
print(f"Sum of each row (axis=1): {np.sum(matrix, axis=1)}")



Matrix:
[[1 2 3]
 [4 5 6]
 [7 8 9]]
Sum of all elements: 45
Sum of each column (axis=0): [12 15 18]
Sum of each row (axis=1): [ 6 15 24]


## Handling NaN in aggregations

In [109]:
data_with_nan = np.array([1, 2, np.nan, 4, 5])
print(f"Data with NaN: {data_with_nan}")
print(f"Regular mean: {np.mean(data_with_nan)}")  # Returns nan
print(f"NaN-safe mean: {np.nanmean(data_with_nan)}")  # Ignores nan


Data with NaN: [ 1.  2. nan  4.  5.]
Regular mean: nan
NaN-safe mean: 3.0


## NumPy arrays have specific data types

In [110]:
arr_int = np.array([1, 2, 3])
print(f"\nInteger array: {arr_int}")
print(f"Data type: {arr_int.dtype}")  # Probably int64 or int32

arr_float = np.array([1.5, 2.3, 3.7])
print(f"\nFloat array: {arr_float}")
print(f"Data type: {arr_float.dtype}")  # Probably float64

# You can specify the data type
arr_int32 = np.array([1, 2, 3], dtype=np.int32)
print(f"\nInt32 array: {arr_int32}")
print(f"Data type: {arr_int32.dtype}")

arr_float32 = np.array([1.5, 2.3, 3.7], dtype=np.float32)
print(f"\nFloat32 array: {arr_float32}")
print(f"Data type: {arr_float32.dtype}")

# Converting between types with astype()
arr = np.array([1.1, 2.9, 3.5, 4.7])
print(f"\nOriginal (float): {arr}, dtype: {arr.dtype}")

arr_int = arr.astype(np.int32)
print(f"Converted to int: {arr_int}, dtype: {arr_int.dtype}")



Integer array: [1 2 3]
Data type: int64

Float array: [1.5 2.3 3.7]
Data type: float64

Int32 array: [1 2 3]
Data type: int32

Float32 array: [1.5 2.3 3.7]
Data type: float32

Original (float): [1.1 2.9 3.5 4.7], dtype: float64
Converted to int: [1 2 3 4], dtype: int32


## MEMORY USAGE

In [112]:
# Smaller data types use less memory
arr_int64 = np.array([1, 2, 3, 4, 5] * 1000, dtype=np.int64)
arr_int32 = np.array([1, 2, 3, 4, 5] * 1000, dtype=np.int32)
print(f'arr_int64 {arr_int64}')
print(f'arr_int32 {arr_int32}')
print(f"\nArray size: {arr_int64.size} elements")
print(f"int64 memory: {arr_int64.nbytes} bytes")
print(f"int32 memory: {arr_int32.nbytes} bytes")
print(f"Memory saved: {arr_int64.nbytes - arr_int32.nbytes} bytes")
print(f"Reduction: {(1 - arr_int32.nbytes/arr_int64.nbytes) * 100:.1f}%")

# Float comparison
arr_float64 = np.array([1.5, 2.3, 3.7] * 1000, dtype=np.float64)
arr_float32 = np.array([1.5, 2.3, 3.7] * 1000, dtype=np.float32)

print(f"\nfloat64 memory: {arr_float64.nbytes} bytes")
print(f"float32 memory: {arr_float32.nbytes} bytes")
print(f"Memory saved: {arr_float64.nbytes - arr_float32.nbytes} bytes")


arr_int64 [1 2 3 ... 3 4 5]
arr_int32 [1 2 3 ... 3 4 5]

Array size: 5000 elements
int64 memory: 40000 bytes
int32 memory: 20000 bytes
Memory saved: 20000 bytes
Reduction: 50.0%

float64 memory: 24000 bytes
float32 memory: 12000 bytes
Memory saved: 12000 bytes


## MINI-PRACTICE EXERCISES

In [113]:
print("\nüìä Exercise 1: Aid Per Capita")
countries = np.array(['USA', 'China', 'India', 'Brazil'])
aid_millions = np.array([500, 300, 200, 150])  # in millions
population_millions = np.array([330, 1400, 1380, 215])  # in millions

# Vectorized division - no loop needed!
aid_per_capita = aid_millions / population_millions
print(f"Countries: {countries}")
print(f"Aid (millions): {aid_millions}")
print(f"Population (millions): {population_millions}")
print(f"Aid per capita: {aid_per_capita}")

# Exercise 2: Replace invalid values with NaN
print("\nüîß Exercise 2: Replace Invalid Values")
temperatures = np.array([25.5, -999, 30.2, -999, 22.8, 28.1])
print(f"Original temperatures: {temperatures}")

# Replace -999 (error code) with NaN
clean_temps = np.where(temperatures == -999, np.nan, temperatures)
print(f"Clean temperatures: {clean_temps}")
print(f"Average temp (ignoring NaN): {np.nanmean(clean_temps):.2f}")

# Exercise 3: Memory usage comparison
print("\nüíæ Exercise 3: Memory Usage Before/After dtype Change")
# Large array of integers (simulating real data)
large_data = np.random.randint(0, 100, size=1000000)

print(f"\nOriginal dtype: {large_data.dtype}")
print(f"Original memory: {large_data.nbytes / 1024:.2f} KB")

# Convert to smaller dtype
large_data_optimized = large_data.astype(np.int8)  # int8 can hold 0-127
print(f"\nOptimized dtype: {large_data_optimized.dtype}")
print(f"Optimized memory: {large_data_optimized.nbytes / 1024:.2f} KB")
print(f"Memory saved: {(large_data.nbytes - large_data_optimized.nbytes) / 1024:.2f} KB")
print(f"Reduction: {(1 - large_data_optimized.nbytes/large_data.nbytes) * 100:.1f}%")

print("\n" + "=" * 70)
print("EXIT CHECK: Why NumPy is Faster and Cleaner Than Loops")
print("=" * 70)
print("""
‚úÖ SPEED:
- NumPy operations are implemented in C (not Python)
- Vectorized operations process all elements at once
- No Python loop overhead
- Can use multiple CPU cores

‚úÖ CLEANER CODE:
- arr * 2 instead of a for loop
- One line instead of multiple
- More readable and maintainable
- Less chance for bugs

‚úÖ MEMORY EFFICIENT:
- Arrays use less memory than lists
- Control data types (int32 vs int64)
- Optimized memory layout

üéØ EXAMPLE:
   BAD:  doubled = [x * 2 for x in numbers]
   GOOD: doubled = arr * 2

   BAD:  for i in range(len(arr)): arr[i] = arr[i] + 10
   GOOD: arr = arr + 10
""")

print("\n" + "=" * 70)
print("PRACTICE MORE!")
print("=" * 70)
print("""
Now try these on your own:
1. Create an array of 10 random numbers and find the mean
2. Create a 3x3 matrix and sum each row
3. Replace all values > 50 with 50 (cap the values)
4. Convert a float array to int and see the memory difference
""")



üìä Exercise 1: Aid Per Capita
Countries: ['USA' 'China' 'India' 'Brazil']
Aid (millions): [500 300 200 150]
Population (millions): [ 330 1400 1380  215]
Aid per capita: [1.51515152 0.21428571 0.14492754 0.69767442]

üîß Exercise 2: Replace Invalid Values
Original temperatures: [  25.5 -999.    30.2 -999.    22.8   28.1]
Clean temperatures: [25.5  nan 30.2  nan 22.8 28.1]
Average temp (ignoring NaN): 26.65

üíæ Exercise 3: Memory Usage Before/After dtype Change

Original dtype: int32
Original memory: 3906.25 KB

Optimized dtype: int8
Optimized memory: 976.56 KB
Memory saved: 2929.69 KB
Reduction: 75.0%

EXIT CHECK: Why NumPy is Faster and Cleaner Than Loops

‚úÖ SPEED:
- NumPy operations are implemented in C (not Python)
- Vectorized operations process all elements at once
- No Python loop overhead
- Can use multiple CPU cores

‚úÖ CLEANER CODE:
- arr * 2 instead of a for loop
- One line instead of multiple
- More readable and maintainable
- Less chance for bugs

‚úÖ MEMORY EFFICIE

In [6]:
print("CHALLENGE SET 1: QUICK FIRE TASKS")
print("Create a 5x5 array filled with numbers 1-25")
print("\nüí° SOLUTION:")
arr = np.arange(1, 26).reshape(5, 5)
print(arr)

CHALLENGE SET 1: QUICK FIRE TASKS
Create a 5x5 array filled with numbers 1-25

üí° SOLUTION:
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]]


In [6]:
print("Given: data = [5, 12, 8, 15, 3, 20, 7]")
print("Task: Extract only values greater than 10")
data = np.array([5, 12, 8, 15, 3, 20, 7])
filtered = data[data > 10]
filtered

Given: data = [5, 12, 8, 15, 3, 20, 7]
Task: Extract only values greater than 10


array([12, 15, 20])

In [8]:
print("\n" + "-" * 80)
print("CHALLENGE 1.3: Normalization")
print("-" * 80)
print("Given: scores = [60, 75, 80, 90, 85]")
print("Task: Normalize to 0-1 range: (value - min) / (max - min)")
print("\nüí° SOLUTION:")
scores = np.array([60, 75, 80, 90, 85])
normalized = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
print(f"Original: {scores}")
print(f"Normalized: {normalized}")



--------------------------------------------------------------------------------
CHALLENGE 1.3: Normalization
--------------------------------------------------------------------------------
Given: scores = [60, 75, 80, 90, 85]
Task: Normalize to 0-1 range: (value - min) / (max - min)

üí° SOLUTION:
Original: [60 75 80 90 85]
Normalized: [0.         0.5        0.66666667 1.         0.83333333]


In [12]:
print("\n" + "-" * 80)
print("CHALLENGE 1.4: Boolean Mask")
print("-" * 80)
print("Given: temps = [22, 35, 18, 40, 25, 38, 19]")
print("Task: Count how many temps are in 'hot' range (> 30)")
print("\nüí° SOLUTION:")
temps = np.array([22, 35, 18, 40, 25, 38, 19])
count = np.sum( temps>30 )
print(count)


--------------------------------------------------------------------------------
CHALLENGE 1.4: Boolean Mask
--------------------------------------------------------------------------------
Given: temps = [22, 35, 18, 40, 25, 38, 19]
Task: Count how many temps are in 'hot' range (> 30)

üí° SOLUTION:
3


In [16]:
print("\n" + "-" * 80)
print("CHALLENGE 1.5: Percentage Calculation")
print("-" * 80)
print("Given: sold = [45, 78, 92, 65], total = [100, 100, 100, 100]")
print("Task: Calculate percentage sold for each item")
print("\nüí° SOLUTION:")
sold = np.array([45, 78, 92, 65])
total = np.array([100, 100, 100, 100])
percentage = sold/total
print(f"{percentage*100}")


--------------------------------------------------------------------------------
CHALLENGE 1.5: Percentage Calculation
--------------------------------------------------------------------------------
Given: sold = [45, 78, 92, 65], total = [100, 100, 100, 100]
Task: Calculate percentage sold for each item

üí° SOLUTION:
[45. 78. 92. 65.]%


In [17]:
print("\n" + "-" * 80)
print("CHALLENGE 1.6: Clipping Values")
print("-" * 80)
print("Given: ages = [5, 12, 18, 65, 72, 88, 45]")
print("Task: Cap all ages between 18 and 65 (min=18, max=65)")
print("\nüí° SOLUTION:")
ages = np.array([5, 12, 18, 65, 72, 88, 45])
clipped = np.clip(ages, 18, 65)
print(f"Original: {ages}")
print(f"Clipped: {clipped}")



--------------------------------------------------------------------------------
CHALLENGE 1.6: Clipping Values
--------------------------------------------------------------------------------
Given: ages = [5, 12, 18, 65, 72, 88, 45]
Task: Cap all ages between 18 and 65 (min=18, max=65)

üí° SOLUTION:
Original: [ 5 12 18 65 72 88 45]
Clipped: [18 18 18 65 65 65 45]


In [18]:
print("\n" + "-" * 80)
print("CHALLENGE 1.8: Categorization")
print("-" * 80)
print("Given: bmi = [18.5, 22, 26, 30, 35]")
print("Task: Label as 'Underweight'(<18.5), 'Normal'(18.5-25), 'Overweight'(25-30), 'Obese'(>30)")
print("\nüí° SOLUTION:")
bmi = np.array([18.5, 22, 26, 30, 35])
# Use nested np.where
labels = np.where(bmi < 18.5, 'Underweight',
         np.where(bmi < 25, 'Normal',
         np.where(bmi < 30, 'Overweight', 'Obese')))
print(f"BMI: {bmi}")
print(f"Categories: {labels}")


--------------------------------------------------------------------------------
CHALLENGE 1.8: Categorization
--------------------------------------------------------------------------------
Given: bmi = [18.5, 22, 26, 30, 35]
Task: Label as 'Underweight'(<18.5), 'Normal'(18.5-25), 'Overweight'(25-30), 'Obese'(>30)

üí° SOLUTION:
BMI: [18.5 22.  26.  30.  35. ]
Categories: ['Normal' 'Normal' 'Overweight' 'Obese' 'Obese']


In [20]:
np.random.rand(2)

array([0.18474917, 0.65461301])

In [21]:
np.random.rand(5,5)

array([[0.19351196, 0.49706205, 0.03015946, 0.57580239, 0.36917667],
       [0.3545475 , 0.84191129, 0.76312628, 0.3673474 , 0.55221061],
       [0.75964347, 0.28893589, 0.64298689, 0.48533489, 0.91009079],
       [0.85847154, 0.08522997, 0.70871425, 0.40762895, 0.0196009 ],
       [0.50039587, 0.40665657, 0.79693455, 0.67777658, 0.14913911]])

In [22]:
np.random.randint(1,100)

28

In [23]:
np.random.randint(1,100,10)

array([67, 47, 19, 41, 59, 51, 50, 18, 87, 18], dtype=int32)

In [24]:
arr = np.arange(25)
arr.reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [7]:
lst = [3,4,3,234,3,423,4]
lst[0::2]=[3,1,2,3]
lst

[3, 4, 1, 234, 2, 423, 3]

In [12]:
lst_2d = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])
lst_2d[1,2]

np.int64(6)

In [17]:
A = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8],
              [9, 10, 11, 12]])
A[: , 1]

array([ 2,  6, 10])

In [15]:
A = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8],
              [9, 10, 11, 12]])
A[:, 0:2]

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10]])

In [20]:
A[:2 , -2:]

array([[3, 4],
       [7, 8]])

In [23]:
A[-2:, :2]

array([[ 5,  6],
       [ 9, 10]])

In [26]:
A[-2: , -2:]

array([[ 7,  8],
       [11, 12]])

In [31]:
A[1] 

array([5, 6, 7, 8])

In [33]:
A = np.array([[10, 11, 12, 13],
              [14, 15, 16, 17],
              [18, 19, 20, 21],
              [22, 23, 24, 25]])
A[1:3, 1:3]

array([[15, 16],
       [19, 20]])

In [34]:
A[::2, 1::2]

array([[11, 13],
       [19, 21]])

In [35]:
A[:2, -2:]

array([[12, 13],
       [16, 17]])

In [38]:
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
B = A[1:, :]       # View
B

array([[4, 5, 6],
       [7, 8, 9]])

In [40]:
C = A[:, 1]        # View
C

array([2, 5, 8])

In [41]:
D = A[0:2, 1:3]    # View
D

array([[2, 3],
       [5, 6]])

In [44]:
# Modify through view
B[0, 0] = 999
print(A)
print(B)

[[  1   2   3]
 [999   5   6]
 [  7   8   9]]
[[999   5   6]
 [  7   8   9]]


In [46]:
# Good: explicit
def modify_in_place(arr):
    """Modifies arr in place (expects a view)"""
    arr[:] = arr * 2
arr = np.array([1,2,3])
modify_in_place(arr)
arr

array([2, 4, 6])

# Slice Assignment - All Rules Explained
## NumPy arrays have fixed size. You cannot:

* Insert elements
* Delete elements
* Change total number of elements

# Rule 1: Scalar Assignment (Broadcasting)

In [48]:
a = np.array([1, 2, 3, 4, 5])
a[1:4] = 0

print(a)  # [1, 0, 0, 0, 5]

[1 0 0 0 5]


# Array Assignment (Must Match)

In [49]:
a = np.array([1, 2, 3, 4, 5])

# ‚úÖ Correct: 3 positions, 3 values
a[1:4] = [10, 20, 30]
print(a)  # [1, 10, 20, 30, 5]

# ‚ùå Wrong: 3 positions, 2 values
a[1:4] = [10, 20]  # ValueError!

# ‚ùå Wrong: 3 positions, 4 values
a[1:4] = [10, 20, 30, 40]  # ValueError!

[ 1 10 20 30  5]


ValueError: could not broadcast input array from shape (2,) into shape (3,)

# Rule 3: 2D Scalar Assignment

In [51]:
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
A[1, :] = 0
print(A)

[[1 2 3]
 [0 0 0]
 [7 8 9]]


In [52]:
A[:, 1] = 0
print(A)
# [[1, 0, 3],
#  [4, 0, 6],
#  [7, 0, 9]]

[[1 0 3]
 [0 0 0]
 [7 0 9]]


In [55]:
A[0:2, 1:3] = 0
print(A)
# [[1, 0, 0],
#  [4, 0, 0],
#  [7, 8, 9]]

[[1 0 0]
 [0 0 0]
 [7 0 9]]


# Rule 4: 2D Array Assignment

In [None]:
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

In [56]:
A[1, :] = [10, 20, 30]
print(A)

[[ 1  0  0]
 [10 20 30]
 [ 7  0  9]]


In [57]:
A[:, 1] = [100, 200, 300]
print(A)

[[  1 100   0]
 [ 10 200  30]
 [  7 300   9]]


In [58]:
A[0:2, 1:3] = [[10, 20],
               [30, 40]]
print(A)

[[  1  10  20]
 [ 10  30  40]
 [  7 300   9]]


In [69]:
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

# Get diagonal as a view
d = np.diag(A)
d.base is A
# d[:] = 0
print(d.flags)
# print(A)

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : False
  ALIGNED : True
  WRITEBACKIFCOPY : False



# Broadcasting - From Basic to Advanced .
* What is Broadcasting?
  - Broadcasting is NumPy's way of performing operations on arrays of different shapes.
