In [2]:
#https://numpy.org/doc/1.23/user/basics.creation.html

import exchelp.exception_helper  as eh
import numpy as np

There are 6 general mechanisms for creating arrays:

   * Conversion from other Python structures (i.e. lists and tuples)

   * Intrinsic NumPy array creation functions (e.g. arange, ones, zeros, etc.)

   * Replicating, joining, or mutating existing arrays

   * Reading arrays from disk, either from standard or custom formats

   * Creating arrays from raw bytes through the use of strings or buffers

   * Use of special library functions (e.g., random)

You can use these methods to create ndarrays or Structured arrays. This document will cover general methods for ndarray creation.

## Initialize from python sequences

In [3]:
a1d = np.array([1,2,3,4])
a2d2 = np.array([[1,2,3,4]])
a2d = np.array([[1,2],[3,4]])
print(a1d, "shape:",a1d.shape,'size:',a1d.size)
print(a2d2, "shape:",a2d2.shape,'size:',a2d2.size)
print(a2d, 'shape:',a2d.shape,"size:",a2d.size)

[1 2 3 4] shape: (4,) size: 4
[[1 2 3 4]] shape: (1, 4) size: 4
[[1 2]
 [3 4]] shape: (2, 2) size: 4


In [4]:
#
# A 3x3 matrix of which elements are 1x2 arrays
#
a3d = np.array([
    [
        [1,2], [2,1],[2,2]
    ],
    [
        [3,2],[2,3],[3,3]
    ],
    [
        [4,3],[3,4],[4,4]
    ]
])

print(a3d, 'shape:',a3d.shape,"size:",a3d.size)

[[[1 2]
  [2 1]
  [2 2]]

 [[3 2]
  [2 3]
  [3 3]]

 [[4 3]
  [3 4]
  [4 4]]] shape: (3, 3, 2) size: 18


In [5]:
#
# A 2x2 matrix of which elemets are 2x2 matrixes
#
a4d = np.array([
    [np.array([[1,2],[3,4]]),np.array([[4,3],[2,1]])],
    [np.array([[5,6],[7,8]]), np.array([[7,8],[5,6]])]
])

print(a4d, 'shape:',a4d.shape,"size:",a4d.size)

[[[[1 2]
   [3 4]]

  [[4 3]
   [2 1]]]


 [[[5 6]
   [7 8]]

  [[7 8]
   [5 6]]]] shape: (2, 2, 2, 2) size: 16


### Defining elemet datatype

In [6]:
#
# wrong data type usage 
#

a = np.array([
    127,128,129
],dtype=np.int8)
a
print(a,'elementTypeInt8',eh.check_type(a[0],np.int8,typecheckmode=eh.TypeCheckMode.SUBTYPE))

a = np.array([
    127,128,129
],dtype=np.int16)
a
print(a,'elementTypeInt16',eh.check_type(a[0],np.int16,typecheckmode=eh.TypeCheckMode.SUBTYPE))

[ 127 -128 -127] elementTypeInt8 True
[127 128 129] elementTypeInt16 True


In [7]:
#
# wrong data type usage 
#
a = np.array([
    2,3,4
],dtype=np.uint32)
b = np.array([
    5,6,7
],dtype=np.uint32)
print(a,b)
c_dif = (a - b)
print('both as uint:',c_dif,'result_type:',c_dif.dtype)
###
a= a.astype(np.int32)
b = b.astype(np.int32)
c_dif = (a - b)
print('both as int32:',c_dif,'result_type:',c_dif.dtype)
###
a= a.astype(np.uint32)
b = b.astype(np.uint32)
c_dif = (a - b.astype(np.int32))
print('one as uint32 other is int32:',c_dif,'result_type:',c_dif.dtype)


[2 3 4] [5 6 7]
both as uint: [4294967293 4294967293 4294967293] result_type: uint32
both as int32: [-3 -3 -3] result_type: int32
one as uint32 other is int32: [-3 -3 -3] result_type: int64


## Shorthand 1D array initializations

### arrange

numpy.arange creates arrays with regularly incrementing values. 

In [8]:
a = np.arange(10)
print('1d:',a)
# start from bottom inclusive to ceilign exclusive
a = np.arange(2,12,dtype=float)
print('1d:',a,"type:",a.dtype)
###
a = np.arange(2,3,0.1)
print('1d:',a,"type:",a.dtype)

1d: [0 1 2 3 4 5 6 7 8 9]
1d: [ 2.  3.  4.  5.  6.  7.  8.  9. 10. 11.] type: float64
1d: [2.  2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9] type: float64


### linespace

numpy.linspace will create arrays with a specified number of elements, and spaced equally between the specified beginning and end values.
 
Guaranteed the number of elements and the starting and end point.

In [9]:
a = np.linspace(1.,4.,12)
print(a,a.shape)

[1.         1.27272727 1.54545455 1.81818182 2.09090909 2.36363636
 2.63636364 2.90909091 3.18181818 3.45454545 3.72727273 4.        ] (12,)


## Shorthand 2D array initialization

### eye, diag, vander

The 2D array creation functions e.g. numpy.eye, numpy.diag, and numpy.vander define properties of special matrices represented as 2D arrays.

In [10]:
# Identity matrix mxm
idmx = np.eye(3)
print(idmx,'shape:',idmx.shape)

#eye matrix nxm
idmx = np.eye(3,6)
print(idmx,'shape:',idmx.shape)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] shape: (3, 3)
[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]] shape: (3, 6)


In [11]:
#
# Diagonal matrix
#
dia = np.diag([1,2,3])
print(dia,'shape:',dia.shape)
##
dia = np.diag([1,2,3],2)
print(dia,'shape:',dia.shape)
##
a = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])
print(a,'diagonals:',np.diag(a))

[[1 0 0]
 [0 2 0]
 [0 0 3]] shape: (3, 3)
[[0 0 1 0 0]
 [0 0 0 2 0]
 [0 0 0 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]] shape: (5, 5)
[[1 2 3]
 [4 5 6]
 [7 8 9]] diagonals: [1 5 9]


### Vandermonde matrix

#### one common application: polynomal fit

Suppose I have a set of points, say (1,2),(5,3),(‚àí2,0),(7,‚àí1). Let's find a polynomial ùëù(ùë•)

that passes through these points. So we have

ùëù(1)=2,ùëù(5)=3,ùëù(‚àí2)=0,ùëù(7)=‚àí1

.

We can use a cubic polynomial ùëù(ùë•)=ùëéùë•3+ùëèùë•2+ùëêùë•+ùëë<br>
so that we have four equations and four unknowns. Hence

ùëù(1)=2
means ùëé(1)3+ùëè(1)2+ùëê(1)+ùëë=2 <br>
ùëù(5)=3 means ùëé(5)3+ùëè(5)2+ùëê(5)+ùëë=3<br>
ùëù(‚àí2)=0 means ùëé(‚àí2)3+ùëè(‚àí2)2+ùëê(‚àí2)+ùëë=0<br>
ùëù(7)=‚àí1 means ùëé(7)3+ùëè(7)2+ùëê(7)+ùëë=‚àí1<br>

So we have
$$

$$
$$

\begin{bmatrix} 
1^0 & 1^1 & 1^2 & 1^3 \\ 
5^0 & 5^1 & 5^2 & 5^3 \\ 
(-2)^0 & (-2)^1 & (-2)^2 & (-2) ^3 \\
7^0 & 7^1 & 7^2 & 7^3 \\
\end{bmatrix}
\begin{bmatrix}
d\\
c\\
b\\
a
\end{bmatrix}
=
\begin{bmatrix}
2 \\
3\\
0\\
1
\end{bmatrix}
$$

So a Vandermonde matrix is used naturally to find a polynomial passing through known points. <br>

#### another application  Vandermonde-like matrix appears in Digital Signal Processing
Digital Signal Processing in the computation of the DFT (Discrete Fourier transform) and the IDFT (Inverse Discrete Fourier Transform). <br>
In short, the DFT is used to convert equi-spaced samples of a function in the time domain to the frequency domain. The IDFT does the exact opposite.


### vander(x, n) defines a Vandermonde matrix as a 2D NumPy array.

In [12]:
# to degree 2
lspace = np.linspace(0,2,5)
a = np.vander(lspace,3)
print(lspace,lspace.shape,'\n',a,a.shape)

[0.  0.5 1.  1.5 2. ] (5,) 
 [[0.   0.   1.  ]
 [0.25 0.5  1.  ]
 [1.   1.   1.  ]
 [2.25 1.5  1.  ]
 [4.   2.   1.  ]] (5, 3)


In [13]:
# to degree 1
a = np.vander([1,2,3,4],2)
print(a,a.shape)

[[1 1]
 [2 1]
 [3 1]
 [4 1]] (4, 2)


In [14]:
#to defgree 3
a = np.vander([1,2,3,4],4)
print(a,a.shape)

#to defgree 3 tupple
a = np.vander((1,2,3,4),4)
print(a,a.shape)

[[ 1  1  1  1]
 [ 8  4  2  1]
 [27  9  3  1]
 [64 16  4  1]] (4, 4)
[[ 1  1  1  1]
 [ 8  4  2  1]
 [27  9  3  1]
 [64 16  4  1]] (4, 4)


## Shorthand ND array initializations

### np.zeros
will create an array filled with 0 values with the specified shape. The default dtype is float64:

In [15]:
a = np.zeros((2,3))
print(a,a.shape,a.dtype)
a = np.zeros((2,3,4))
print(a,a.shape, a.dtype)

[[0. 0. 0.]
 [0. 0. 0.]] (2, 3) float64
[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]] (2, 3, 4) float64


### np.ones
will create an array filled with 1 values. It is identical to zeros. The default dtype is float64:

In [16]:
a = np.ones((2,3))
print(a,a.shape,a.dtype)
a = np.ones((2,3,4))
print(a,a.shape, a.dtype)

[[1. 1. 1.]
 [1. 1. 1.]] (2, 3) float64
[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]] (2, 3, 4) float64


### numpy.random.default_rng

The random method of the result of default_rng will create an array filled with random values between 0 and 1. It is included with the numpy.random library. 


In [17]:
from numpy.random import default_rng


a = default_rng().random((2,3))
print(a,a.shape,a.dtype)
# give a seed to reproduce same values
seed = 42
a = default_rng(seed=seed).random((2,3,2))
print(a,a.shape,a.dtype)

[[0.01155961 0.15255782 0.03963661]
 [0.46571509 0.48474455 0.23006173]] (2, 3) float64
[[[0.77395605 0.43887844]
  [0.85859792 0.69736803]
  [0.09417735 0.97562235]]

 [[0.7611397  0.78606431]
  [0.12811363 0.45038594]
  [0.37079802 0.92676499]]] (2, 3, 2) float64


### numpy.indices
will create a set of arrays (stacked as a one-higher dimensioned array), one per dimension with each representing variation in that dimension.
This is particularly useful for evaluating functions of multiple dimensions on a regular grid.


In [18]:
a = np.indices((2,3))
print(a,a.shape)
i,j = a
print('rowindices:',i,i.shape)
print('colindices:',j,j.shape)



[[[0 0 0]
  [1 1 1]]

 [[0 1 2]
  [0 1 2]]] (2, 2, 3)
rowindices: [[0 0 0]
 [1 1 1]] (2, 3)
colindices: [[0 1 2]
 [0 1 2]] (2, 3)


In [19]:
### Example
#Suppose you have a matrix M whose (i,j)-th element equals
# M_ij = 2*i + 3*j
#One way to define this matrix would be
i, j = np.indices((2,3))
M = 2*i + 3*j
#which yields
M

array([[0, 3, 6],
       [2, 5, 8]])

## Shorthand 2D array initializations

## Replicating, joining, or mutating existing arrays

### numpy.copy 

In [20]:
a = np.array([
    3,1,2,4,6,9
])
b = a[:2]
b += 1
print(a,b) # addition still references to the original array

#instead
a = np.array([
    3,1,2,4,6,9
])
b = a[:2].copy()
b += 1
print(a,b) # original array not affected




[4 2 2 4 6 9] [4 2]
[3 1 2 4 6 9] [4 2]


###  numpy.vstack, numpy.hstack, and numpy.block.
 join existing arrays

 #### np.block (arrs)

In [21]:
A = np.ones((2, 2))
B = np.eye(2, 2)
C = np.zeros((2, 2))
D = np.diag((-3, -4))
E = np.block([[A, B], [C, D]])
print(E, E.shape)


[[ 1.  1.  1.  0.]
 [ 1.  1.  0.  1.]
 [ 0.  0. -3.  0.]
 [ 0.  0.  0. -4.]] (4, 4)


#### np.vstack (tup)

Stack arrays in sequence vertically (row wise).

In [22]:
a = np.array([
    1,2,3
])
b = np.array([
    4,5,6
])
vs = np.vstack((a,b))

print(vs,vs.shape)

[[1 2 3]
 [4 5 6]] (2, 3)


In [23]:
a = np.array([
    [1],
    [2],
    [3]
])

print(a.shape)

b = np.array([
    [4],
    [5],
    [6]
])
vs = np.vstack((a,b))
print(vs,vs.shape)



(3, 1)
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]] (6, 1)


#### np.hstack (tup)

Stack arrays in sequence horizontally (column wise).

In [24]:
a = np.array([
    1,2,3
])

b = np.array([
    4,5,6
])
vs = np.hstack((a,b))

print(vs,vs.shape)

[1 2 3 4 5 6] (6,)


In [25]:
a = np.array([
    [1],
    [2],
    [3]
])

b = np.array([
    [4],
    [5],
    [6]
])
vs = np.hstack((a,b))
print(vs,vs.shape)

[[1 4]
 [2 5]
 [3 6]] (3, 2)


## Shorthand ND array initializations from Disk

#### numpy.loadtxt (no missing values)

In [26]:
#create dummy csv
a = np.array([
    [0,1,2],
    [3,4,5],
    [6,7,8]
])
np.savetxt('test-array.csv',a,delimiter=',')

a = np.loadtxt('test-array.csv',delimiter=',')
print(a,a.shape)

#### numpy.genfromtxt (with missing values)

In [30]:
# with missing values - replaces with nan
#create dummy array csv
arr = '1,2,3\n4, ,6\n7,8,9'
f = open('test-array.csv','w')
a = f.write(arr)
f.close()

a = np.genfromtxt('test-array.csv',delimiter=',')
print(a,a.shape)


[[ 1.  2.  3.]
 [ 4. nan  6.]
 [ 7.  8.  9.]] (3, 3)
