# Numpy array manipulations

In [1]:
import numpy as np

## Creating numpy array from a list

In [2]:
A = np.array([[1, 2], [3, 4]])
print(A)
print(type(A))
print(A.dtype)

[[1 2]
 [3 4]]
<class 'numpy.ndarray'>
int64


## Accessing entries (basic)

In [70]:
A = np.array([[1, 2], [3, 4]])
print(A)
print('Single entry {}'.format(A[0, 1]))
print('Entire row {}'.format((A[1])))
print(type(A[1]))
print(type(A[0][0]))
print(type(A[0, 0]))
print('Entire column {}'.format(A[:, 1]))
print('Last column {}'.format(A[:, -1]))

[[1 2]
 [3 4]]
Single entry 2
Entire row [3 4]
<class 'numpy.ndarray'>
<class 'numpy.int64'>
<class 'numpy.int64'>
Entire column [2 4]
Last column [2 4]


## Changing the value of some entries (basic)

In [33]:
A = np.array([[1, 2], [3, 4]])
print(A)
print('Change single entry')
A[0, 1] = 7
print(A)
print('Change entire row')
A[1] = [10, 11]
print(A)
print('Change entire column')
A[:, 0] = [-3, -4]
print(A)

[[1 2]
 [3 4]]
Change single entry
[[1 7]
 [3 4]]
Change entire row
[[ 1  7]
 [10 11]]
Change entire column
[[-3  7]
 [-4 11]]


## Copying arrays

The following line shows that this doesn't make a real copy of the array A, but assign a second "name" to the array stored in the memory having so far only the name A. So changing B changes A.

In [47]:
A = np.array([[1, 2], [3, 4]])
print('id of A = {}'.format(id(A)))
B = A
print('id of B = {}'.format(id(B)))

B[0, 0] = 666
print(B)
print(A)

id of A = 139929565989344
id of B = 139929565989344
[[666   2]
 [  3   4]]
[[666   2]
 [  3   4]]


In order to make a real copy, one has to use:

In [48]:
A = np.array([[1, 2], [3, 4]])
print('id of A = {}'.format(id(A)))
B = A.copy()
print('id of B = {}'.format(id(B)))
B[0, 0] = 666
print(B)
print(A)

id of A = 139929565990064
id of B = 139929565990704
[[666   2]
 [  3   4]]
[[1 2]
 [3 4]]


## Type of the entries (print it and change it)

In [105]:
A = np.arange(6).reshape(2, 3)
print(type(A))
print(A.dtype)

<class 'numpy.ndarray'>
int64


To copy an array, cast to a specified type:

In [107]:
B = A.astype('float64')
C = A.astype(np.float64)
D = C.astype('int32')
print(B.dtype)
print(B.dtype==C.dtype)
print(A)
B[0, 0] = 32
print(A)
print(D)
print(D.dtype)

float64
True
[[0 1 2]
 [3 4 5]]
[[0 1 2]
 [3 4 5]]
[[0 1 2]
 [3 4 5]]
int32


I don't really understand how nump.ndarray.view works. It seems to change the size of the array, or the values of the enries.

In [59]:
A = np.arange(6, dtype='float32').reshape(2, 3)
print(A)
print(id(A))
print(A.dtype)
B = A.view('int32')
print(B)
print(B.dtype)
print(id(B))
print(A.dtype)
print(A)
B[:] = A
print(B)
print(B.dtype)
print(id(B))
print(A.dtype)
print(A)

[[0. 1. 2.]
 [3. 4. 5.]]
139929565997696
float32
[[         0 1065353216 1073741824]
 [1077936128 1082130432 1084227584]]
int32
139929565996496
float32
[[0. 1. 2.]
 [3. 4. 5.]]
[[0 1 2]
 [3 4 5]]
int32
139929565996496
float32
[[0.e+00 1.e-45 3.e-45]
 [4.e-45 6.e-45 7.e-45]]


## Shape and dimensions

In [82]:
A = np.arange(6).reshape(2, 3)
print(A.shape)
print(A.ndim)
print(A)
A.reshape(3, 2)
print(A)
A.shape = (3, 2)
print(A)

(2, 3)
2
[[0 1 2]
 [3 4 5]]
[[0 1 2]
 [3 4 5]]
[[0 1]
 [2 3]
 [4 5]]


In [80]:
b = np.array([[(1,2), (3,4)], [(5,6), (7,8)]])
print(b)
print(b[0])
print(b[0][1])
print(b[0][1][0])
print(b.shape)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
[[1 2]
 [3 4]]
[3 4]
3
(2, 2, 2)


## Concatenating arrays

In [72]:
A = np.arange(6, dtype='int32').reshape(2, 3)
B = np.array([[3, 1, 4], [1, 5 ,9]])
C = np.concatenate((A, B), axis=1)
D = np.concatenate((A, B), axis=0)
print(C)
print(D)

[[0 1 2 3 1 4]
 [3 4 5 1 5 9]]
[[0 1 2]
 [3 4 5]
 [3 1 4]
 [1 5 9]]


There is also another way using np.r_ but it more complicated. Cf https://docs.scipy.org/doc/numpy/reference/generated/numpy.r_.html

## Various transforms

Transpose

In [73]:
A = np.arange(6, dtype='int32').reshape(2, 3)
print(A)
print(A.T)

[[0 1 2]
 [3 4 5]]
[[0 3]
 [1 4]
 [2 5]]


## Creating random arrays

In [75]:
from numpy import random

In [77]:
A = np.random.permutation(10)
print(A)

[3 8 2 7 1 0 9 6 4 5]


In [78]:
y = np.array([1,2,3.1])
order = np.random.permutation(3)
y = y[order]

## Finding indices

In [83]:
A = np.arange(6).reshape(2, 3)

In [89]:
A[0, 0] = 3
print(A)
ind = np.where(A==3)
print(ind)
print(type(ind))
print(type(ind[0]))
print(A[ind])
print(type(A[ind]))

[[3 1 2]
 [3 4 5]]
(array([0, 1]), array([0, 0]))
<class 'tuple'>
<class 'numpy.ndarray'>
[3 3]
<class 'numpy.ndarray'>


## Removing slices

In [95]:
A = np.arange(24).reshape(2, 4, 3)
print(A)
B = np.delete(A, obj=[-1,], axis=0)
print(B)
C = np.delete(A, obj=[0, 2], axis=1)
print(C)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]
  [ 9 10 11]]

 [[12 13 14]
  [15 16 17]
  [18 19 20]
  [21 22 23]]]
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]
  [ 9 10 11]]

 [[12 13 14]
  [15 16 17]
  [18 19 20]
  [21 22 23]]]
[[[ 3  4  5]
  [ 9 10 11]]

 [[15 16 17]
  [21 22 23]]]


  This is separate from the ipykernel package so we can avoid doing imports until


## Save and restore numpy arrays

In [99]:
A = np.arange(6).reshape(2, 3)
print(id(A))
np.save(file='A', arr=A)

139929565217456


In [100]:
B = np.load('A.npy')
print(B)
print(id(B))

[[0 1 2]
 [3 4 5]]
139929565227168


## Finding unique values

In [101]:
A = np.array([['a', 'b'], ['a', 'c']])

In [103]:
np.unique(A)

array(['a', 'b', 'c'], dtype='<U1')

## Shuffling rows of an array

If one wants to shuffle a single array.

In [5]:
A = np.arange(30).reshape(10, 3)

In [6]:
print(A)
np.random.shuffle(A)
print(A)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]
 [15 16 17]
 [18 19 20]
 [21 22 23]
 [24 25 26]
 [27 28 29]]
[[ 9 10 11]
 [21 22 23]
 [27 28 29]
 [ 0  1  2]
 [ 6  7  8]
 [12 13 14]
 [15 16 17]
 [18 19 20]
 [ 3  4  5]
 [24 25 26]]


If one wants to shuffle two arrays (typically the features and the responses) with the same permutation:

In [117]:
X = np.arange(21).reshape(7, -1)
y = 10*np.arange(7)

In [119]:
print(X)
print(y)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]
 [15 16 17]
 [18 19 20]]
[ 0 10 20 30 40 50 60]


In [121]:
perm = np.random.permutation(len(y))
X_shuff = X[perm]
y_shuff = y[perm]

In [122]:
print(X_shuff)
print(y_shuff)

[[18 19 20]
 [ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [15 16 17]
 [ 9 10 11]
 [12 13 14]]
[60  0 10 20 50 30 40]
