# 7. Data Preprocessing with NumPy

[## PE#1 - Indexing, Assigning, np.squeeze()](#pe1---indexing-assigning-npsqueeze)

[## PE#2 - Generating Data](#pe2---generating-data)

[## PE#3 - Read and Save Data + Statistics](#pe3---read-and-save-data--statistics)

[## PE#4 - Data Manipulation](#pe4---data-manipulation)

In [86]:
import numpy as np
from numpy.random import Generator as gen  
from numpy.random import PCG64 as pcg   

np.__version__
np.set_printoptions(suppress=True, linewidth=100, precision=2)

In [87]:
# Function show_attr def

def show_attr(arrnm: str) -> str:
    ''' Show numpy ndarray principal attributes
    
    arrnm: array name. Must exist en the main program body
    --> CAN NOT be call inside other function
    '''
    
    if not isinstance(arrnm, str):
          return '-> show_attr() >> ERROR: argument must be an string!'
    
    strout = f' {arrnm}: '
    for attr in ('shape', 'ndim', 'size', 'dtype'):     #, 'itemsize'):
            arrnm_attr = arrnm + '.' + attr
            strout += f'| {attr}: {eval(arrnm_attr)} '

    return strout

## PE#1 - Indexing, Assigning, np.squeeze()

In [None]:
# 1. Array Dimensions and Shapes
a = np.array([10])      # 1D
b = np.array(3)         # 0D
c = np.array([[12]])    # 2D

In [None]:
# 2. Arrays vs. Lists
r = np.array([[1,2], [3,4], [5,6]])
e = [[1,2], [3,4], [5,6]]
# Arrays add element-wise | list add concatenating.

In [130]:
# 3. Indexing and Assigning values in ndarrays.
s = np.array([[1,2,3], [4,5,6]])
# s[0] = ['1', '1', '1']      # => s[0] = [1,1,1] int
s[-1] = [1,2,3]
s[1] = [1,2,3]
s

array([[1, 2, 3],
       [4, 5, 6]])

In [136]:
# 4. Algebraic Ops between ndarrays.
p = np.array([[1,10,3], [1,20,4]])
q = np.array([1,2,3])
# p.reshape(3,2) + [1,1,1]    # ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [138]:
# 5. Slicing - Slice
A = np.array([[12, -3, 4, 7], [23,31,-12,10], [24,9,18,15],
              [81,5,6,1], [-10,18,17,29]])
# display(A)
A[::2, 1::3]

array([[-3],
       [ 9],
       [18]])

In [142]:
# 6. Conditional Slicing - Boolean Indexing
S = np.array([[1,2,3,4], [12,34,23,22], [-2,5,10,90], [89,-25,-18,15]])
# S[(S > 0) & (S % 2 != 0)]
S[(S > 0) & (S % 2 == 1)]

array([ 1,  3, 23,  5, 89, 15])

## PE#2 - Generating Data

1. __numpy.empyt(shape=, dtype=)__
   - .ones(); .zeros(), .full(fill_value)
2. __ndarray.empy_like(arr, dtype=)__
   - ones_like(); zeros_like(), full_like(fill_value)
3. __form numpy.random import Generator as gen | PCG64 as pcg__
   - PCG64 bit generator | rg = gen(pcg(seed=42)); rg.normal(size=(3,5))
4. __random.Generator.integers(low, high=None, size=None, dtype=np.int64, endpoint=False)__ <- method
   - .random(size=, dtype=, out=None); .choice(a, size=None, p=)
5. __random.Generator.normal(loc=0.0, scale=1.0, size=None)__ <- method
   - .poisson(lam=) ; .binomial(n, p); .logistic(loc=, scale=)

In [88]:
# 1. ones_like - full_like
# 1. Knowing A create 3x3 arr which e/entry = -1
A = np.array([[12,45,6], [55,34,-4], [-6,-12,90]])
display(A)
r1 = np.full_like(A, fill_value=-1)
r2 = -np.ones_like(A)
display(r1, r2)

array([[ 12,  45,   6],
       [ 55,  34,  -4],
       [ -6, -12,  90]])

array([[-1, -1, -1],
       [-1, -1, -1],
       [-1, -1, -1]])

array([[-1, -1, -1],
       [-1, -1, -1],
       [-1, -1, -1]])

In [89]:
# 2. np.arange(start, stop, step, dtype)
# f = np.arange(start=25)     # TypeError: arange() requires stop to be specified.
f = np.arange(25)
g = np.arange(30, step=1.5, dtype=np.int32)
display(f, g)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
       23, 24])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [90]:
# 3.  rg.normal .integers .random
# 3. generate a 5x5 arr, full of fixed vals drawn from a normal dist.
rg = gen(pcg(seed=32))
arr = rg.normal(size=(5,5))
arr

array([[ 0.91,  0.75, -0.03,  0.65, -0.65],
       [-1.83,  1.68,  0.48, -2.07, -1.06],
       [-0.62,  0.34,  1.33,  0.31, -0.43],
       [ 0.47, -0.22,  0.18, -0.67, -1.21],
       [ 1.28, -0.32,  0.12, -0.6 , -0.54]])

In [91]:
# 4. rg.choice(p=) probability
# 4. twice more likely to draw nums from t3 than t1, and 4 times more from t2 than t1
t1 = (12, 3, 4, 10)
t2 = (-5, 9)
t3 = (19, 21, 100, 89)

m = list(t1 + t2 + t3)
display(m)

rg = gen(pcg(seed=64))
arr = rg.choice(m,
                p=[0.05, 0.05, 0.05, 0.05, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1],
                size=(3,3))

arr

[12, 3, 4, 10, -5, 9, 19, 21, 100, 89]

array([[ 89,   9,   3],
       [ -5, 100,   3],
       [ 89,  19,  10]])

In [92]:
# 5. Binomial distr
# 5. coin w/probability of heads 0.3 and 80 tosses, (seed=60)
rg = gen(pcg(seed=60))
arr = rg.binomial(n=80, p=0.3, size=(3,3))
np.mean(arr)

25.666666666666668

In [93]:
# 6. rg.'distributions' 
# 6. c1, c2, c3: norma, exponential, logistic
rg = gen(pcg(seed=365))
c1 = rg.normal(loc=3, scale=2, size=(500))
c2 = rg.exponential(scale=5, size=(500))
c3 = rg.logistic(loc=10, scale=4, size=(500))

t = np.array([c1, c2, c3]).T
np.max(t)

35.04153699695336

In [94]:
# XTRA
rg = gen(pcg(seed=365))

inte = rg.integers(low=-20, high=99, size=(4,6))
rand = rg.random(size=(3,5))
# display(inte, rand)       

## PE#3 - Read and Save Data + Statistics

#### 6. Importing and Saving Data with NumPy
1. __numpy.loadtxt(fname, delimiter=, usecols=, unpack=,...)__
   - (fname, dtype='float', ...)
2. __numpy.genfromtxt(fname, delimiter=, skip_header=, skip_footer=, filling_values=e, usecols=,  unpack=, ...)__
   - (fname, dtype=<'float', ...)
3. __numpy.save(file, arr, ...)__ - .npy file
   - np.save() -> .npy file <- .load()
4. __numpy.savez(file, *args, **kwds)__ - .npz file
   - ('file', header=arr_h, data=arr_d)
   - savez(file, x, y), names will be 'arr_0', 'arr_1', etc.
5. __numpy.savetxt(fname, X, fmt='%s', delimiter=' ',...)__
   - fname with extension

#### 7. Statistics with NumPy
1. __np.sort()__
   - axis=None (flat) idem np.sort(np.reshape(X,))
2. __np.min(axis=) - np.amin(axis=)__
3. __np.max() - np.amax() .-| axis=__
4. __np.minimun(), np.maximun()__
   .reduce() .-| 
5. __np.median() .-| axis=__
6. __numpy.ptp(a, axis=None, ...)__
   (..., out=None, keepdims=<no value>)
7. __np.percentile()__
   - q=0-100, axis=
8. __np.quantile()__
   - q=0-1, axis=
9.  __np.mean(axis=)__
10. __np.average() .-| weights=, axis=__
11. __numpy.std(a, axis=None, dtype=None,...)__
   - (...,out=None, ddof=0, keepdims=<no value>, *, where=<no value>, mean=<no value>, correction=<no value>)
13. __numpy.var(a, axis=None, dtype=None, ...)__
   -(..., out=None, ddof=0, keepdims=<no value>, *, where=<no value>, mean=<no value>, correction=<no value>)
14. __np.cov() .-| m, y=None, rowvar=True .-| cov(X,X)= var(X)__
15. __np.corrcoef .-| x, y=None, rowvar=True__
16. __np.histogram .-| a, bins=, range=__
17. __np.histogram2d .-| x, y, bins=, range=__
18. __np.histogramdd .-| sample, bins, range=__
- NAN equiv funct. - np.nanvar() - np.nan... .-| jm -> np.histogram(A[~np.isnan(A)])

In [95]:
# 1. loadtxt() is faster than genfromtxt(), but breaks down if we feed it incomplete data

In [96]:
# 2. Minimum in each separate colum
txt = np.genfromtxt('Lending-Company-Numeric-Data.csv', delimiter=',')
display(np.min(txt, axis=0))
display(np.minimum.reduce(txt, axis=0))
np.minimum.reduce(txt)

array([ 1000.,    35.,   365., -2870., -2870.,  -350.])

array([ 1000.,    35.,   365., -2870., -2870.,  -350.])

array([ 1000.,    35.,   365., -2870., -2870.,  -350.])

In [97]:
# 3. Percentile - Quantile
# 3. First 5 of 20 - One do 88% (entra al equipo?)
r = np.array([[58,62,92,79,65], [91,66,72,74,84],
              [38,81,23,45,89], [49,95,86,26,90]])
percent_q = (20 - 5) / 20 * 100
np.percentile(r, percent_q)

86.75

In [98]:
# 4. .median(a) = .percentile(a, q=50)
heights = np.array([[158,162,175,179,188], [191,163,172,174,184],
                    [182,181,163,155,179], [159,195,186,166,190]])

m = np.median(heights)
p = np.percentile(heights, q=50)
q = np.quantile(heights, q=0.5)

print(f'Median: {m} - Percentil(50%): {p} - Quantil(0.5): {q}')

Median: 177.0 - Percentil(50%): 177.0 - Quantil(0.5): 177.0


In [99]:
# 5. Covariance and Correlation - .cov(X,Y); .corrcoef(X,Y)
# 5. .cov between r0 and r2 and .corrcoef between r2 an r3
arr = np.array([[1,2,-4,9], [1,12,-3,12], [1,2,3,4], [5,6,-10,3]])
display(np.cov(arr))
display(np.corrcoef(arr))
display(np.cov(arr[0], arr[2]))
np.corrcoef(arr[2], arr[3])

array([[28.67, 33.67,  3.  , 25.33],
       [33.67, 59.  ,  3.  , 40.33],
       [ 3.  ,  3.  ,  1.67, -3.67],
       [25.33, 40.33, -3.67, 55.33]])

array([[ 1.  ,  0.82,  0.43,  0.64],
       [ 0.82,  1.  ,  0.3 ,  0.71],
       [ 0.43,  0.3 ,  1.  , -0.38],
       [ 0.64,  0.71, -0.38,  1.  ]])

array([[28.67,  3.  ],
       [ 3.  ,  1.67]])

array([[ 1.  , -0.38],
       [-0.38,  1.  ]])

In [100]:
# 6. Histogram - Probability class' final exams
# 6. 35 students - 40 Qs - c_q: correct questions
# Grades: A(34-40); B(28-34); C(22-28); D(16-22); E(10-16); <10 re-take

c_q = np.array([39,30,35,21,22,3,10,3,22,4,21,29,37,34,12,4,35,7,12,19,34,39,1,12,
                11,28,2,17,3,16,9,33,5,15,9])
display(np.sort(c_q))
h1 = np.histogram(c_q, bins=5, range=(10,40))
h2 = np.histogram(c_q, bins=[0,10,16,22,28,34,40])
display(h1, h2)

array([ 1,  2,  3,  3,  3,  4,  4,  5,  7,  9,  9, 10, 11, 12, 12, 12, 15, 16, 17, 19, 21, 21, 22,
       22, 28, 29, 30, 33, 34, 34, 35, 35, 37, 39, 39])

(array([6, 5, 2, 4, 7], dtype=int64), array([10., 16., 22., 28., 34., 40.]))

(array([11,  6,  5,  2,  4,  7], dtype=int64),
 array([ 0, 10, 16, 22, 28, 34, 40]))

## PE#4 - Data Manipulation

### 8. Data Manipulation with NumPy
1. __np.loadtxt('filenm', delimiter=, dtype=, usecols=)__
   - fails with missing vals (NANs)
2. __np.isnan(arr)__ {ufunc}
   - np.isnan().sum()
3. __np.genfromtxt('filenm', delimiter=, filling_values=tmp)__
    - dtype=float, usecols=None, skip_header or footer=0
4. __np.where(condition, True, False)__
5. __np.reshape()__ <- inplace Flase>
    - add dims artificially -> np.reshape(1,1,3,5) + method
6. __np.delete(arr, ixs, axis=None)__ <- inplace False>
    - 2-D arr axis=0 row, axis=1 cols | .delete(.delete( axis=1) axis=0) cols and rows
7. __np.sort(arr, axis=1)__ <- inplace False>
    - -np.sort(-arr) <- descending | arr.sort() <- inplace True!! | axis=-1 default (en 2D is = 1 rows)
8. __np.argsort(arr, axis=-1)__ <- inplace False>
    - .argsort() method idem <- inplacer False>
9.  __np.argwhere(arr)__ {arr or condition}
    - indices of non-zero elements of arr | argwhere(isnan(a))
10.  __np.shuffle(arr)__ <-inplace True> NO axis= (only rows)
     - Also .shuffle(arr, axis=0) Generator method <-inplace True> | 2-D array axis=0 shuffle rows, axis=1 shuffle cols.
11. __np.astype()__ <- inplace False> idem the .astype() method
12. __np.chararray.strip(arr, 'str')__ <- inplacer False>
13. __np.stack(arr, axis=0)__ <- inplace False>
    - stack add 1Dim | .stack() SAME shape, .vstack() and .hstack() NO  | .dstack() depth
14. __np.concatenate((tup_arrs), axis=0)__ <-inplace False>
    - axis=0 > .vstack; axis=1 > .hstack; axis=2 > dstack
15. __np.unique(arr, return_index=False, return_counts=False)__
    - return unique values, ascending sorted

In [101]:
# 1. What is the num of NAN values in 8_A_Data_preprocessing_Numpy.csv
data_NAN = np.genfromtxt('8_A_Data_preprocessing_Numpy.csv', delimiter=';')
np.isnan(data_NAN).sum()

61

In [102]:
# 2. Find the mean on e/column
# display(np.mean(data, axis=0))      # All NANs
display(np.nanmean(data_NAN, axis=0))

  display(np.nanmean(data_NAN, axis=0))


array([     nan,    47.5 ,   365.  ,  3495.16,  5073.5 , 18461.31])

In [103]:
# 3. You replace all missing w/minimum of e/col. Recalc mean, and col[1].mean()
display(datamin := np.nanmin(data_NAN))
data = np.genfromtxt('8_A_Data_preprocessing_Numpy.csv',
                     delimiter=';',
                     filling_values=np.nanmin(data_NAN))

display(np.mean(data, axis=0))
display(np.mean(data, axis=0)[1])
np.mean(data[:,1])


-1850.0

array([-1850.  ,  -258.55,  -135.16,  2460.61,  3510.13, 15185.29])

-258.5483870967742

-258.5483870967742

In [104]:
# 4. np.sort  - reshape - flat | Sort the flattened array
A = np.array([[12,34,-15,56], [23,22,90,-56],
              [4,-19,27,31], [45,56,61,70]])

# display(A)
display(np.sort(A, axis=None))
display(np.sort(np.reshape(A, (16,))))
# display(np.sort(A.flat))

array([-56, -19, -15,   4,  12,  22,  23,  27,  31,  34,  45,  56,  56,  61,  70,  90])

array([-56, -19, -15,   4,  12,  22,  23,  27,  31,  34,  45,  56,  56,  61,  70,  90])

In [105]:
# 5. Replace the original NANs with 1
for nul_pos in np.argwhere(np.isnan(data_NAN)):
    data_NAN[nul_pos[0], nul_pos[1]] = 1

display(np.isnan(data_NAN).sum())
data_NAN[data_NAN == 1].sum()

0

61.0

In [106]:
# 6. Strip str and reorder data
d = np.array([['e_q1', 'e_q2', 'e_q3', 'e_q4'],
              [1,2,3,2]])
display(d)
d[0] = np.chararray.strip(d[0], 'e_')
display(d)
display(np.stack((d[0], d[1]), axis=1))
np.dstack((d[0], d[1]))


array([['e_q1', 'e_q2', 'e_q3', 'e_q4'],
       ['1', '2', '3', '2']], dtype='<U11')

array([['q1', 'q2', 'q3', 'q4'],
       ['1', '2', '3', '2']], dtype='<U11')

array([['q1', '1'],
       ['q2', '2'],
       ['q3', '3'],
       ['q4', '2']], dtype='<U11')

array([[['q1', '1'],
        ['q2', '2'],
        ['q3', '3'],
        ['q4', '2']]], dtype='<U11')

## Some Questions - Exercises 

In [107]:
# Q1 
dataN0 = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',
                       delimiter=';',
                       skip_header=1)
np.isnan(dataN0).sum()

260

In [108]:
# Q3 Change NAN to min
# display(c1_min := np.nanmin(dataN[:,0]))      # 1000.0
display(c1b_min := np.nanmin(dataN[0:,:1]))
print(np.isnan(dataN[0:,:1]).sum())
# Imposible change NANs without indices
for val in dataN[0:,:1]:
    # if val == [np.nan]:
    if np.isnan(val):
        pass
        # print(val)
        # print(val, end=' - ')

1000.0

34


In [109]:
# Q3 2nd col (col[1]) dataN mean -258.55
print('1stColMean:', np.nanmean(dataN[:,0]))
print('2ndColMean:', np.nanmean(dataN[:,1]))
print('3thColMean:', np.nanmean(dataN[:,2]))
print('4thColMean:', np.nanmean(dataN[:,3]))

1stColMean: 2250.2477700693757
2ndColMean: 46.10552763819096
3thColMean: 365.0
4thColMean: 3895.992864424057


In [110]:
# Q4 Sort a flattened version of the array - diff. alternatives
A = np.array([[12,34,-15,56], [23,22,90,-56], [4,-19,27,31], [45,56,61,70]])
display(np.sort(A, axis=None))
display(np.sort(np.reshape(A, (16,))))
#                     np.sort(A).reshape(16,)     # BAD

array([-56, -19, -15,   4,  12,  22,  23,  27,  31,  34,  45,  56,  56,  61,  70,  90])

array([-56, -19, -15,   4,  12,  22,  23,  27,  31,  34,  45,  56,  56,  61,  70,  90])

In [111]:
# Q5 Replace NANs with 1 - ELIMINATE NANs from 'dataN'
for null_pos in np.argwhere(np.isnan(dataN)):
    # dataN[null_pos[0], null_pos[1]] = 1
    pass

np.isnan(dataN).sum()

260

### 1. When using the np.sum() function on a boolean array returned by np.isnan(), the result will indicate the total number of missing values in the dataset.
- True
- Explanation: In NumPy, True can be represented with 1 and False can be represented with 0. Summing the boolean array returned by np.isnan() will count the number of True values, which corresponds to the number of missing values.

### 2. Filling missing values with the mean of the column will not change the overall mean of that column.
- True
- Explanation: Filling missing values with the mean of a column ensures that the overall mean remains unchanged. This method keeps the distribution of data consistent while handling missing values.

### 3. What does the np.where() function do?
- It replaces values in an array based on a condition.

### 4. Setting axis=None in np.sort() on a 2D array will sort the flattened array and return a 1D array.
- True 

### 5. Which of the following best describes the default behavior of np.sort() when sorting a 2D array?
- It sorts each row of the array in ascending order.
- Explanation: The default behavior of np.sort() on a 2D array is to sort each row (the last axis) individually in ascending order.

### 6. The np.argwhere() function returns the coordinates of all non-zero elements in a NumPy array by
- True
- Explanation: By default, np.argwhere() checks for non-zero elements in the array and returns their coordinates as indices in a 2D array.

### 7. When using np.random.shuffle() the function returns a new array without modifying the original array.
- False
- Explanation: np.random.shuffle() modifies the array in place, meaning it rearranges the rows of the dataset directly and does not return a new array.

### 8. You have a 2D array of float values and you want to cast them into integers. Complete the following code:


In [112]:
data = np.array([[1.2, 2.5, 3.1], [4.6, 5.8, 6.9]]) 

# Convert float values to integers
int_data = data.astype(dtype = np.int32) 
print(int_data)

[[1 2 3]
 [4 5 6]]


## Some JM - Exercises

In [113]:
# 5. reshape function and method
display(A := np.arange(1,7).reshape(2,3))
np.reshape(A, (3,2))    # <- Inplace=False
display(A)
A.reshape(3,2)          # <- Inplace=False
B = A.reshape(3,2) 
B

array([[1, 2, 3],
       [4, 5, 6]])

array([[1, 2, 3],
       [4, 5, 6]])

array([[1, 2],
       [3, 4],
       [5, 6]])

In [114]:
# 7. sort function and method
display(C := np.arange(6,0,-1).reshape(2,3))
display(np.sort(C))
display(np.sort(C, axis=1))
display(np.sort(C, axis=-1))    # sort rows
C
display(np.sort(C, axis=0))     # sort cols
C
display(C.sort())
C

array([[6, 5, 4],
       [3, 2, 1]])

array([[4, 5, 6],
       [1, 2, 3]])

array([[4, 5, 6],
       [1, 2, 3]])

array([[4, 5, 6],
       [1, 2, 3]])

array([[3, 2, 1],
       [6, 5, 4]])

None

array([[4, 5, 6],
       [1, 2, 3]])

In [115]:
# sorting in descending order
display(np.flip(np.sort(C)))
display(C)
-np.sort(-C)
C

array([[3, 2, 1],
       [6, 5, 4]])

array([[4, 5, 6],
       [1, 2, 3]])

array([[4, 5, 6],
       [1, 2, 3]])

In [116]:
# Sorting letters in descending order
lst = [chr(i) for i in range(100, 115)]
display(L := np.array(lst).reshape(3,5))
display(np.sort(L))
np.flip(np.sort(L))
# numpy.flip(m, axis=None)

array([['d', 'e', 'f', 'g', 'h'],
       ['i', 'j', 'k', 'l', 'm'],
       ['n', 'o', 'p', 'q', 'r']], dtype='<U1')

array([['d', 'e', 'f', 'g', 'h'],
       ['i', 'j', 'k', 'l', 'm'],
       ['n', 'o', 'p', 'q', 'r']], dtype='<U1')

array([['r', 'q', 'p', 'o', 'n'],
       ['m', 'l', 'k', 'j', 'i'],
       ['h', 'g', 'f', 'e', 'd']], dtype='<U1')

In [117]:
# True - False element-wise letters
display(L)
L[0,3] = '' 
L[1,1] = 0
L[2,4] = False
display(L)
display(L[L == True])
display(L[L == False])
display(L[L != False])

array([['d', 'e', 'f', 'g', 'h'],
       ['i', 'j', 'k', 'l', 'm'],
       ['n', 'o', 'p', 'q', 'r']], dtype='<U1')

array([['d', 'e', 'f', '', 'h'],
       ['i', '0', 'k', 'l', 'm'],
       ['n', 'o', 'p', 'q', 'F']], dtype='<U1')

array([], dtype='<U1')

array([], dtype='<U1')

array(['d', 'e', 'f', '', 'h', 'i', '0', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'F'], dtype='<U1')

In [118]:
# True - False element-wise nums
display(R := np.random.randn(15).round(2).reshape(3,5) * 17)
R[0,3] = R[1,0] = R[2,4] = 0
display(R)
display(R[R == True])
display(R[R == False])
display(R[R != False])
display(R[R != True])

array([[  9.18,  20.06,   4.93,  -6.46,  -5.27],
       [-39.1 ,  -2.55,  33.83,  20.23,   9.86],
       [ -3.4 , -15.98,   9.69,   7.14,   9.52]])

array([[  9.18,  20.06,   4.93,   0.  ,  -5.27],
       [  0.  ,  -2.55,  33.83,  20.23,   9.86],
       [ -3.4 , -15.98,   9.69,   7.14,   0.  ]])

array([], dtype=float64)

array([0., 0., 0.])

array([  9.18,  20.06,   4.93,  -5.27,  -2.55,  33.83,  20.23,   9.86,  -3.4 , -15.98,   9.69,
         7.14])

array([  9.18,  20.06,   4.93,   0.  ,  -5.27,   0.  ,  -2.55,  33.83,  20.23,   9.86,  -3.4 ,
       -15.98,   9.69,   7.14,   0.  ])

In [119]:
# 10 .shuffle Gen method, inplace True, axis=0 rows default
display(I := np.arange(18).reshape(3,6))

from numpy.random import Generator as gen  
from numpy.random import PCG64 as pcg 

array_RG = gen(pcg())
array_RG.shuffle(I, axis=0)

I


array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

array([[12, 13, 14, 15, 16, 17],
       [ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [120]:
# display(I := np.arange(18).reshape(3,6))
# np.random.shuffle(I, axis=1)
# I
# # TypeError: shuffle() got an unexpected keyword argument 'axis'

In [121]:
# 11 .astype() method <- inplace False
display(I := np.arange(18).reshape(3,6))
display(I)
display(I.astype(dtype=str))
display(I)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

array([['0', '1', '2', '3', '4', '5'],
       ['6', '7', '8', '9', '10', '11'],
       ['12', '13', '14', '15', '16', '17']], dtype='<U11')

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

In [122]:
## 13. The truth about the 'same' shape in stacks
display(S := np.arange(24).reshape(4,6))
display(S2r := S[[-1,-2]])
# np.stack((S,S2r)) # ValueError: all input arrays must have the same shape
# np.stack((S, S2r), axis=1)    # ValueError:

np.vstack((S,S2r))  # OK!!

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

array([[18, 19, 20, 21, 22, 23],
       [12, 13, 14, 15, 16, 17]])

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [18, 19, 20, 21, 22, 23],
       [12, 13, 14, 15, 16, 17]])

In [123]:
## 14. Concatenate - v-h stack <- dimensions
display(S := np.arange(24).reshape(4,6))
print(show_attr('S'))
display(S2r := S[[-1,-2]])
print(show_attr('S2r'))
display(S2c := S[:,[2,1]])
print(show_attr('S2c'))

display(concat_r := np.concatenate((S, S2r)))
print(show_attr('concat_r'))

display(vstack := np.vstack((S, S2r)))
print(show_attr('vstack'))

display(stack_r := np.stack((S, S)))
print(show_attr('stack_r'))

display(concat_c := np.concatenate((S, S2c), axis=1))
print(show_attr('concat_c'))

display(hstack := np.hstack((S, S2c)))
print(show_attr('hstack'))

display(stack_c := np.stack((S, S), axis=1))
print(show_attr('stack_c'))

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

 S: | shape: (4, 6) | ndim: 2 | size: 24 | dtype: int32 


array([[18, 19, 20, 21, 22, 23],
       [12, 13, 14, 15, 16, 17]])

 S2r: | shape: (2, 6) | ndim: 2 | size: 12 | dtype: int32 


array([[ 2,  1],
       [ 8,  7],
       [14, 13],
       [20, 19]])

 S2c: | shape: (4, 2) | ndim: 2 | size: 8 | dtype: int32 


array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [18, 19, 20, 21, 22, 23],
       [12, 13, 14, 15, 16, 17]])

 concat_r: | shape: (6, 6) | ndim: 2 | size: 36 | dtype: int32 


array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [18, 19, 20, 21, 22, 23],
       [12, 13, 14, 15, 16, 17]])

 vstack: | shape: (6, 6) | ndim: 2 | size: 36 | dtype: int32 


array([[[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23]],

       [[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23]]])

 stack_r: | shape: (2, 4, 6) | ndim: 3 | size: 48 | dtype: int32 


array([[ 0,  1,  2,  3,  4,  5,  2,  1],
       [ 6,  7,  8,  9, 10, 11,  8,  7],
       [12, 13, 14, 15, 16, 17, 14, 13],
       [18, 19, 20, 21, 22, 23, 20, 19]])

 concat_c: | shape: (4, 8) | ndim: 2 | size: 32 | dtype: int32 


array([[ 0,  1,  2,  3,  4,  5,  2,  1],
       [ 6,  7,  8,  9, 10, 11,  8,  7],
       [12, 13, 14, 15, 16, 17, 14, 13],
       [18, 19, 20, 21, 22, 23, 20, 19]])

 hstack: | shape: (4, 8) | ndim: 2 | size: 32 | dtype: int32 


array([[[ 0,  1,  2,  3,  4,  5],
        [ 0,  1,  2,  3,  4,  5]],

       [[ 6,  7,  8,  9, 10, 11],
        [ 6,  7,  8,  9, 10, 11]],

       [[12, 13, 14, 15, 16, 17],
        [12, 13, 14, 15, 16, 17]],

       [[18, 19, 20, 21, 22, 23],
        [18, 19, 20, 21, 22, 23]]])

 stack_c: | shape: (4, 2, 6) | ndim: 3 | size: 48 | dtype: int32 
