### Where for efficient if else

In [1]:
import numpy as np

In [2]:
# Make a random number generator
generator = np.random.default_rng()

In [3]:
# Sample 1M integers between 0 and 5
foo = generator.integers(6, size= 2*10**6)
print(foo)

[0 1 0 ... 5 3 3]


In [4]:
# Sample 1M integers between 0 and 5
bar = generator.integers(6, size= 2*10**6)
print(bar)

[2 4 2 ... 3 0 5]


Q. For every element in bar if its value is even then double the value of foo and if it is is odd then divide it by half and assign it to baz.


In [7]:
# -- first inefficient method using loop

%%timeit # this is used to see the running time of code in notebook only

baz = np.zeros(foo.size) # msaking an arry with same dimensional as baz but with default value as 0
for i in range(foo.size):
  if bar[i] % 2 == 0:
    baz[i] = foo[i] * 2
  else:
    baz[i] = foo[i] / 2

print(baz)

UsageError: Line magic function `%%timeit` not found.


In [9]:
# efficient code wsing where
boz = np.where(bar % 2 == 0, foo * 2, foo / 2)
print(boz)

[0.  2.  0.  ... 2.5 6.  1.5]


In [11]:
np.array_equal(baz, boz)

True

### Math Functions

In [2]:
import numpy as np

In [3]:
squee = np.array([
  [5,2,9],
  [1,0,2],
  [1,7,8]
])

In [5]:
# sum of all array elements
np.sum(squee)

35

In [6]:
# calculate column sums
np.sum(squee, axis = 0)

array([ 7,  9, 19])

In [7]:
# calculate row sums
np.sum(squee, axis = 1)
# note the results are in 1d array(bascially a single array)

array([16,  3, 16])

In [9]:
# to keep the results in original dimension
np.sum(squee, axis = 1, keepdims = True)
# now the result will be in original 2d array(basically array inside array)

array([[16],
       [ 3],
       [16]])

In [10]:
# if any value is nan then the entire sum will be nan
squee[0, 0] = np.nan
print(squee)

ValueError: cannot convert float NaN to integer

The error you're encountering occurs because you are trying to assign a NumPy NaN (Not a Number) value to an element in an integer array. NumPy arrays have a fixed data type, and if you try to assign a value that cannot be cast to the array's data type, you'll get an error.

In your case, the array squee is of integer type, and you're attempting to assign np.nan, which is a floating-point value. To resolve this issue, you can change the data type of the array to a floating-point type that supports NaN values, such as np.float64

In [13]:
squee = np.array([
  [5, 2, 9],
  [1, 0, 2],
  [1, 7, 8]
], dtype=np.float64)  # Specify the data type as float64

print(squee)

[[5. 2. 9.]
 [1. 0. 2.]
 [1. 7. 8.]]


In [12]:
# or define the value as float as
squee = np.array([
    [5.0, 2.0, 9.0],
    [1.0, 0.0, 2.0],
    [1.0, 7.0, 8.0]
])

print(squee)

[[5. 2. 9.]
 [1. 0. 2.]
 [1. 7. 8.]]


In [15]:
# if any value is nan then the entire sum will be nan
squee[0, 0] = np.nan
print(squee)

[[nan  2.  9.]
 [ 1.  0.  2.]
 [ 1.  7.  8.]]


In [16]:
# now the sum of array containing nan elements will be nan
np.sum(squee)

nan

In [17]:
# calculate sum by excluding the nan elemets by:
np.sum(squee, where = ~np.isnan(squee))

30.0

In [18]:
# calculate the sum by replacing the nan value with 0
np.sum(np.nan_to_num(squee))

30.0

In [19]:
# or use nansum which treats nan as 0 and calculates the sum
np.nansum(squee)

30.0

check if any value is nan

In [20]:
import numpy as np

In [21]:
foo = np.array([
    [np.nan, 2.0, 9.0],
    [np.nan, np.nan, np.nan],
    [1.0, 7.0, 8.0]
])

In [22]:
np.isnan(foo)

array([[ True, False, False],
       [ True,  True,  True],
       [False, False, False]])

In [25]:
# check only in column
np.any(np.isnan(foo), axis = 1)

array([ True,  True, False])

In [26]:
mask = np.any(np.isnan(foo), axis = 1)

In [27]:
foo[mask]

array([[nan,  2.,  9.],
       [nan, nan, nan]])

In [30]:
mask = np.all(np.isnan(foo), axis = 1)

In [31]:
foo[mask]

array([[nan, nan, nan]])

 concatenate()

In [32]:
import numpy as np

In [33]:
roux = np.zeros(shape=(3,2))
print(roux)

[[0. 0.]
 [0. 0.]
 [0. 0.]]


In [34]:
gumbo = np.ones(shape=(2,2))
print(gumbo)

[[1. 1.]
 [1. 1.]]


In [35]:
np.concatenate((roux,roux,roux), axis = 0) # concatenate column wise

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [36]:
np.concatenate((roux,roux,roux), axis = 1) # concatenate row wise

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [37]:
# when concatenating, the arrays must have same shape exculding the axis in which you are concating
np.concatenate((roux,gumbo), axis = 0) # since both arrays have same column size ie 2 so concatenating is possible

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [1., 1.]])

In [38]:
np.concatenate((roux,gumbo), axis = 1) # since both arrays have different row size ie 3 for roux and 2 for gumbo so concatenating is not possible

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 3 and the array at index 1 has size 2