# Data Manipulation

In [17]:
!pip install numpy --upgrade



In [1]:
import numpy as np

### numpy.copy()
- Return an array copy of the given object.
- **Syntax**: `numpy.copy(a, order='K', subok=False)`

In [4]:
x = np.array([1, 2, 3])
y = x
z = np.copy(x)
z

array([1, 2, 3])

In [5]:
x[0] = 10
x[0] == y[0]

np.True_

In [6]:
x[0] == z[0]

np.False_

In [8]:
a = np.array([1,2,3])
b = a.copy()
b[0] = 0
a,b

(array([1, 2, 3]), array([0, 2, 3]))

### numpy.ravel()
- Return a flattened array.
- **Syntax**: `numpy.ravel(a, order='C')`

In [19]:
x = np.array([[1,2],[3,4],[5,6]])
np.ravel(x)

array([1, 2, 3, 4, 5, 6])

In [20]:
x.reshape(-1)

array([1, 2, 3, 4, 5, 6])

In [21]:
np.ravel(x, order='F')

array([1, 3, 5, 2, 4, 6])

In [22]:
np.ravel(x.T, order='A') #.T if for transpose
# When order is ‘A’, it will preserve the array’s ‘C’ or ‘F’ ordering:

array([1, 2, 3, 4, 5, 6])

In [23]:
a = np.arange(3)[::-1]; a
a.ravel(order='C'), a.ravel(order='F')
# When order is ‘K’, it will preserve orderings that are neither ‘C’ nor ‘F’, but won’t reverse axes:

(array([2, 1, 0]), array([2, 1, 0]))

In [24]:
a = np.arange(12).reshape(2,3,2).swapaxes(1,2); a
a.ravel(order='A'),a.ravel(order='K'),a.ravel(order='C'), a.ravel(order='F')

(array([ 0,  2,  4,  1,  3,  5,  6,  8, 10,  7,  9, 11]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]),
 array([ 0,  2,  4,  1,  3,  5,  6,  8, 10,  7,  9, 11]),
 array([ 0,  6,  1,  7,  2,  8,  3,  9,  4, 10,  5, 11]))

### numpy.reshape()
- Gives a new shape to an array without changing its data.
- **Syntax**:  `numpy.reshape(a, /, shape=None, order='C', *, newshape=None, copy=None)`
  - a - The array to reshape.
  - shape - The new shape.
  - order - 'C' (row-major) or 'F' (column-major). Can be keyword.
  - newshape - Alternative way to specify shape. Must be keyword-only.
  - copy - Whether to force a copy. Must be keyword-only.
  - The `/` in a function signature means that everything before it must be passed as a positional argument, not as a keyword argument.



In [25]:
a = np.arange(6).reshape((3, 2))
a

array([[0, 1],
       [2, 3],
       [4, 5]])

In [26]:
np.reshape(a,(2,3)) # C-like index ordering

array([[0, 1, 2],
       [3, 4, 5]])

In [27]:
np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape

array([[0, 1, 2],
       [3, 4, 5]])

In [28]:
np.reshape(a, (2, 3), order='F') # Fortran-like index ordering

array([[0, 4, 3],
       [2, 1, 5]])

In [29]:
np.reshape(np.ravel(a, order='F'), (2, 3), order='F')

array([[0, 4, 3],
       [2, 1, 5]])

In [30]:
a = np.array([[1,2,3], [4,5,6]])
np.reshape(a, 6)

array([1, 2, 3, 4, 5, 6])

In [31]:
np.reshape(a, (3,-1)) # the unspecified value is inferred to be 2

array([[1, 2],
       [3, 4],
       [5, 6]])

### flatten()
- Convert multi-dimensional arrays to 1D
- **Syntax**: `numpy.ndarray.flatten(order='C')`

In [40]:
a = np.array([[1,2],[3,4]])
a.flatten()
#np.ndarray.flatten(a) # this would also work

array([1, 2, 3, 4])

### Stack
- Join a sequence of arrays along a new axis.
- **Syntax**: `numpy.stack(arrays, axis=0, out=None, *, dtype=None, casting='same_kind')`

  1. casting: Controls what kind of data casting may occur
    - Imagine you have a number that's stored as a whole number (like 5), but you need to use it in a calculation that only works with decimal numbers (like 5.0). You need to "translate" the whole number into a decimal number, which is like casting.

In [61]:
arrays = np.random.rand(2,3,4)
np.stack(arrays, axis=0).shape, np.stack(arrays, axis=1).shape, np.stack(arrays, axis=2).shape

((2, 3, 4), (3, 2, 4), (3, 4, 2))

In [65]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
np.stack((a, b))

array([[1, 2, 3],
       [4, 5, 6]])

In [64]:
np.stack((a, b), axis=-1)

array([[1, 4],
       [2, 5],
       [3, 6]])

In [69]:
rng = np.random.default_rng() # RNG -> random number generator - This is better than np.random (older method) because it's more flexible and has better statistical properties.
arrays = [rng.normal(size=(3,4)) for _ in range(10)] # Generate a list of 10 random (3x4) arrays | `_` is a throwaway variable, just used to loop 10 times
np.stack(arrays, axis=0).shape , np.stack(arrays, axis=2).shape

((10, 3, 4), (3, 4, 10))

#### hstack()
- Horizontally stack arrays
- **Syntax**: `numpy.hstack(tup, *, dtype=None, casting='same_kind')`

In [41]:
a = np.array((1,2,3))
b = np.array((4,5,6))
np.hstack((a,b))

array([1, 2, 3, 4, 5, 6])

In [42]:
a = np.array([[1],[2],[3]])
b = np.array([[4],[5],[6]])
np.hstack((a,b))

array([[1, 4],
       [2, 5],
       [3, 6]])

#### vstack()
- Vertically stack arrays
- **Syntax**: `numpy.vstack(tup, *, dtype=None, casting='same_kind')`

In [70]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
np.vstack((a,b))

array([[1, 2, 3],
       [4, 5, 6]])

In [71]:
a = np.array([[1], [2], [3]])
b = np.array([[4], [5], [6]])
np.vstack((a,b))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

### concatenate()
- Concatenate multiple arrays. ( Join a sequence of arrays along an existing axis ).
- **Syntax**: `numpy.concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind")`


In [84]:
a = np.array([[1,2],[3,4]])
b = np.array([[1,2]])
np.concatenate((a, b), axis=0)

array([[1, 2],
       [3, 4],
       [1, 2]])

In [90]:
np.concatenate((a, b.T), axis=1)

((2, 2), (1, 2))

In [88]:
np.concatenate((a, b), axis=None)

array([1, 2, 3, 4, 1, 2])

### Split()
- Split an array into subarrays
- **Syntax**: `numpy.split(ary, indices_or_sections, axis=0)`
  - If *indices_or_sections* is an integer, N, the array will be divided into N equal arrays along axis. If such a split is not possible, an error is raised.

In [95]:
x = np.arange(9.0) # -> [0., 1., 2., 3., 4., 5., 6., 7., 8.]
np.split(x, 3)

[array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])]

In [98]:
x = np.arange(8.0) # -> [0., 1., 2., 3., 4., 5., 6., 7.]
np.split(x, [3,4,5])

[array([0., 1., 2.]), array([3.]), array([4.]), array([5., 6., 7.])]

### References
- [numpy](https://numpy.org/doc/stable/reference/routines.array-manipulation.html)