## **FILE --  I/O**

In [6]:
import numpy as np

# Create an array of numbers from 0 to 9
arr = np.arange(10)

# Save the array to a .npy file
np.save('my_array.npy', arr)

# Load the array back from the file
loaded_arr = np.load('my_array.npy')

# Print the original and loaded arrays
print("Original array:", arr)
print("Loaded array:  ", loaded_arr)
import numpy as np  # Import the NumPy library for numerical computations

# Create a 1D NumPy array with values from 0 to 9
arr = np.arange(10)  # np.arange(n) creates an array with values from 0 to n-1

# Create another array by multiplying each element of 'arr' by 2
arr2 = arr * 2  # Element-wise multiplication, resulting in array [0, 2, 4, ..., 18]

# Save multiple arrays into a single compressed .npz archive
# Each array is stored with a keyword (x and y here)
np.savez('my_archive.npz', x=arr, y=arr2)
# 'my_archive.npz' is the file name. It stores multiple arrays in compressed format.

# Load the .npz archive file
data = np.load('my_archive.npz')
# This returns a dictionary-like object where arrays are accessed by the keywords used when saving

# Access and print the array stored with the keyword 'x'
print(data['x'])  # Prints the original array [0, 1, 2, ..., 9]

# Access and print the array stored with the keyword 'y'
print(data['y'])  # Prints the doubled array [0, 2, 4, ..., 18]

# Close the loaded .npz file (optional but good practice for file objects)
data.close()

# Create a 2D NumPy array of shape (3, 4) with values from 0 to 11
arr2d = np.arange(12).reshape(3, 4)
# np.arange(12) creates values [0, ..., 11]; reshape(3,4) turns it into a 3x4 matrix

# Save the 2D array to a CSV file
np.savetxt(
    'my_array.csv',         # Output file name
    arr2d,                  # Data to save
    delimiter=',',          # Use commas to separate values (CSV format)
    fmt='%.2f',             # Format each number with 2 decimal places
    header='c1,c2,c3,c4',   # Add a header row with column names
    comments=''             # Avoid adding '#' before the header line
)

# Load the 2D data back from the CSV file
loaded_txt = np.loadtxt(
    'my_array.csv',         # Input file name
    delimiter=',',          # Use commas to parse values
    skiprows=1              # Skip the first row (header)
)

# Print the loaded 2D array
print(loaded_txt)


Original array: [0 1 2 3 4 5 6 7 8 9]
Loaded array:   [0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
[ 0  2  4  6  8 10 12 14 16 18]


## Statistics & Advanced Operations (Common in DS/ML/AI)

In [8]:
import numpy as np  # Import NumPy library

# Create a NumPy array with numbers and a NaN value
data = np.array([1, 2, 2, 3, 3, 4, 5, np.nan, 6])

# Compute the median while including NaN (will return nan)
print(np.median(data))  # Output: nan because np.median doesn't ignore NaNs

# Compute the median while ignoring NaNs
print(np.nanmedian(data))  # Output: 3.0

# Compute the 25th, 50th, and 75th percentiles ignoring NaNs
print(np.percentile(data[~np.isnan(data)], [25, 50, 75]))
# Output: [2. 3. 4.] — shows the quartiles of the cleaned data

# Check which elements in 'data' are NaN (returns boolean array)
print(np.isnan(data))
# Output: [False False ... True False]

# Check which elements are infinite (positive or negative infinity)
print(np.isinf(np.array([1, np.inf])))
# Output: [False  True]

# Sum of array ignoring NaNs
print(np.nansum(data))  # Output: 26.0

# Mean of array ignoring NaNs
print(np.nanmean(data))  # Output: 3.25

# Replace NaNs with 0.0, +inf with 1e6, -inf with -1e6
print(np.nan_to_num(data, nan=0.0, posinf=1e6, neginf=-1e6))
# Output: [1. 2. 2. 3. 3. 4. 5. 0. 6.]

# Find unique values and their counts (ignoring NaNs)
values, counts = np.unique(data[~np.isnan(data)], return_counts=True)
print(f"Values: {values}, Counts: {counts}")
# Output: Values: [1. 2. 3. 4. 5. 6.], Counts: [1 2 2 1 1 1]

# Count the number of occurrences of each integer value
int_data = np.array([0, 1, 1, 3, 2, 1, 7])
print(np.bincount(int_data))
# Output: [1 3 1 1 0 0 0 1] — index i holds count of number i

# Weighted bincount where each element contributes its corresponding weight
weights = np.array([0.1, 0.5, 0.2, 0.3, 1.0, 0.8, 0.9])
print(np.bincount(int_data, weights=weights))
# Output: Weighted sum of values at each integer index

# Define bin edges and data to assign into bins
bins = np.array([0, 3, 6])
data_to_bin = np.array([-1, 0, 1, 2, 3, 4, 5, 6, 7])

# Assign elements to bins based on where they fall
binned_indices = np.digitize(data_to_bin, bins)
print(binned_indices)
# Output: Bin index (1-based) of each element in 'data_to_bin'

# Apply conditional operation: if data > 3 keep it, else multiply it by 10
condition = data > 3
print(np.where(condition, data, data * 10))
# Output: elements where condition is False are multiplied by 10

# Check if any element in data is greater than 5 (ignores NaN)
print(np.any(data > 5))  # Output: True

# Check if all elements in data are greater than 0 (returns False because of NaN)
print(np.all(data > 0))  # Output: False

# Check if all non-NaN elements are greater than 0
print(np.all(data[~np.isnan(data)] > 0))  # Output: True

# Floating point comparison — may fail due to precision error
a = np.array([0.1 + 0.2])
b = np.array([0.3])
print(a == b)         # Output: False (because 0.1 + 0.2 ≠ exactly 0.3)
print(np.allclose(a, b))  # Output: True (allows for small floating-point error)

# Set operations
set1 = np.array([1, 2, 3, 4])
set2 = np.array([3, 4, 5, 6])

# Intersection: common elements
print(np.intersect1d(set1, set2))  # Output: [3 4]

# Union: all unique elements
print(np.union1d(set1, set2))  # Output: [1 2 3 4 5 6]

# Set difference: elements in set1 not in set2
print(np.setdiff1d(set1, set2))  # Output: [1 2]

# Set difference again (repeat)
print(np.setdiff1d(set1, set2))  # Output: [1 2]

# Apply a custom function across rows and columns
arr = np.arange(6).reshape(2, 3)  # Create a 2x3 matrix

# Define a function that computes range (max - min)
def my_func(x): return x.max() - x.min()

# Apply function across rows (axis=1)
print(np.apply_along_axis(my_func, axis=1, arr=arr))
# Output: [2 2] — range in each row

# Apply function across columns (axis=0)
print(np.apply_along_axis(my_func, axis=0, arr=arr))
# Output: [3 3 3] — range in each column

# Create meshgrid (used for plotting or evaluating functions on a grid)
x = np.array([0, 1, 2])
y = np.array([0, 1])
xv, yv = np.meshgrid(x, y)

# Display meshgrid output
print("xv (grid of x values):\n", xv)
print("yv (grid of y values):\n", yv)


nan
3.0
[2.   3.   4.25]
[False False False False False False False  True False]
[False  True]
26.0
3.25
[1. 2. 2. 3. 3. 4. 5. 0. 6.]
Values: [1. 2. 3. 4. 5. 6.], Counts: [1 2 2 1 1 1]
[1 3 1 1 0 0 0 1]
[0.1 1.5 1.  0.3 0.  0.  0.  0.9]
[0 1 1 1 2 2 2 3 3]
[10. 20. 20. 30. 30.  4.  5. nan  6.]
True
False
True
[False]
True
[3 4]
[1 2 3 4 5 6]
[1 2]
[1 2]
[2 2]
[3 3 3]
xv (grid of x values):
 [[0 1 2]
 [0 1 2]]
yv (grid of y values):
 [[0 0 0]
 [1 1 1]]
