# np.where s. where pd.where

## np.where

In [45]:
import numpy as np
import pandas as pd

In [46]:

np.where?

[1;31mDocstring:[0m
where(condition, [x, y], /)

Return elements chosen from `x` or `y` depending on `condition`.

.. note::
    When only `condition` is provided, this function is a shorthand for
    ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
    preferred, as it behaves correctly for subclasses. The rest of this
    documentation covers only the case where all three arguments are
    provided.

Parameters
----------
condition : array_like, bool
    Where True, yield `x`, otherwise yield `y`.
x, y : array_like
    Values from which to choose. `x`, `y` and `condition` need to be
    broadcastable to some shape.

Returns
-------
out : ndarray
    An array with elements from `x` where `condition` is True, and elements
    from `y` elsewhere.

See Also
--------
choose
nonzero : The function that is called when x and y are omitted

Notes
-----
If all the arrays are 1-D, `where` is equivalent to::

    [xv if c else yv
     for c, xv, yv in zip(condition, x, y

numpy.where(condition, [x, y, ]/)

If all the arrays are 1-D, `where` is equivalent to::

    [xv if c else yv
     for c, xv, yv in zip(condition, x, y)]

In [66]:
a = np.arange(10, 20)
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [68]:
np.where(a > 13)

(array([4, 5, 6, 7, 8, 9], dtype=int64),)

In [69]:
np.where(a < 5, a, 10*a)

array([100, 110, 120, 130, 140, 150, 160, 170, 180, 190])

In [9]:
np.where([[True, False], [True, True]],
         [[1, 2], [3, 4]],
         [[9, 8], [7, 6]])

array([[1, 8],
       [3, 4]])

In [10]:
np.where([[True, False], [True, True]],
         [[1, 2], [3, 4]],
         [[5, 5]])

array([[1, 5],
       [3, 4]])

In [11]:
c = [xv if c else yv for c, xv, yv in zip([[True, False], [True, True]], 
                                          [[1, 2], [3, 4]], [[9, 8], [7, 6]])]
c

[[1, 2], [3, 4]]

In [12]:
list(zip([[True, False], [True, True]], [[1, 2], [3, 4]], [[9, 8], [7, 6]]))

[([True, False], [1, 2], [9, 8]), ([True, True], [3, 4], [7, 6])]

In [50]:
x, y = np.ogrid[:3, :4]
x

array([[0],
       [1],
       [2]])

In [16]:
y

array([[0, 1, 2, 3]])

In [17]:
np.ogrid?

[1;31mType:[0m        OGridClass
[1;31mString form:[0m <numpy.lib.index_tricks.OGridClass object at 0x000002183115B8B0>
[1;31mFile:[0m        c:\programdata\anaconda3\lib\site-packages\numpy\lib\index_tricks.py
[1;31mDocstring:[0m  
`nd_grid` instance which returns an open multi-dimensional "meshgrid".

An instance of `numpy.lib.index_tricks.nd_grid` which returns an open
(i.e. not fleshed out) mesh-grid when indexed, so that only one dimension
of each returned array is greater than 1.  The dimension and number of the
output arrays are equal to the number of indexing dimensions.  If the step
length is not a complex number, then the stop is not inclusive.

However, if the step length is a **complex number** (e.g. 5j), then
the integer part of its magnitude is interpreted as specifying the
number of points to create between the start and stop values, where
the stop value **is inclusive**.

Returns
-------
mesh-grid
    `ndarrays` with only one dimension not equal to 1

See Also
-

## s.where

In [51]:
s = pd.Series(range(5))
s.where(s > 2)

0    NaN
1    NaN
2    NaN
3    3.0
4    4.0
dtype: float64

In [52]:
s.mask(s > 2)

0    0.0
1    1.0
2    2.0
3    NaN
4    NaN
dtype: float64

In [53]:
s[s > 2]

3    3
4    4
dtype: int64

In [56]:
s = pd.Series(range(5))
t = pd.Series([True, False, True])

In [57]:
s.where(t, 99)

0     0
1    99
2     2
3    99
4    99
dtype: int64

In [58]:
s.mask(t, 99)

0    99
1     1
2    99
3    99
4    99
dtype: int64

In [59]:
s = pd.Series(range(5))
s

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [60]:
s.where(s > 2, 10)

0    10
1    10
2    10
3     3
4     4
dtype: int64

In [61]:
s.mask(s > 2, 10)

0     0
1     1
2     2
3    10
4    10
dtype: int64

In [62]:
df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
df

Unnamed: 0,A,B
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9


In [35]:
m = df % 3 == 0
m

Unnamed: 0,A,B
0,True,False
1,False,True
2,False,False
3,True,False
4,False,True


In [38]:
df.where(m)

Unnamed: 0,A,B
0,0.0,
1,,3.0
2,,
3,6.0,
4,,9.0


In [37]:
df.where(m, -df)

Unnamed: 0,A,B
0,0,-1
1,-2,3
2,-4,-5
3,6,-7
4,-8,9


In [42]:
np.where(m, -df, df)

array([[ 0,  1],
       [ 2, -3],
       [ 4,  5],
       [-6,  7],
       [ 8, -9]])

In [43]:
df.where(m, -df) == np.where(m, df, -df)

Unnamed: 0,A,B
0,True,True
1,True,True
2,True,True
3,True,True
4,True,True


In [44]:
df.where(m, -df) == df.mask(~m, -df)

Unnamed: 0,A,B
0,True,True
1,True,True
2,True,True
3,True,True
4,True,True


## pandas.where

In [1]:
import pandas as pd

s = pd.Series(range(5))

In [2]:
s.where(s > 0)

0    NaN
1    1.0
2    2.0
3    3.0
4    4.0
dtype: float64

In [3]:
s.mask(s > 0)

0    0.0
1    NaN
2    NaN
3    NaN
4    NaN
dtype: float64

In [5]:
s = pd.Series(range(5))
t = pd.Series([True, False])
s.where(t, 99)

0     0
1    99
2    99
3    99
4    99
dtype: int64

In [6]:
s.where(s > 1, 10)

0    10
1    10
2     2
3     3
4     4
dtype: int64

In [7]:
s.mask(s > 1, 10)

0     0
1     1
2    10
3    10
4    10
dtype: int64

In [9]:
import numpy as np

df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
df

Unnamed: 0,A,B
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9


In [10]:
m = df % 3 == 0
df.where(m, -df)

Unnamed: 0,A,B
0,0,-1
1,-2,3
2,-4,-5
3,6,-7
4,-8,9


In [11]:
df.where(m, -df) == np.where(m, df, -df)

Unnamed: 0,A,B
0,True,True
1,True,True
2,True,True
3,True,True
4,True,True


In [12]:
df.where(m, -df) == df.mask(~m, -df)

Unnamed: 0,A,B
0,True,True
1,True,True
2,True,True
3,True,True
4,True,True


In [72]:
import pandas as pd
import numpy as np

dates = ["April-10", "April-11", "April-12", "April-13", "April-14", "April-16"]
sales = [200, 300, 400, 200, 300, 300]
prices = [3, 1, 2, 4, 3, 2]

df = pd.DataFrame({"Date": dates, "Sales": sales, "Price": prices})

reqd_Index = list(np.where(df["Sales"] >= 300))
print(reqd_Index)

[array([1, 2, 4, 5], dtype=int64)]


In [71]:
np.where(df["Sales"] >= 300)

(array([1, 2, 4, 5], dtype=int64),)

In [70]:
df["Sales"].where(df["Sales"] >= 300)

0      NaN
1    300.0
2    400.0
3      NaN
4    300.0
5    300.0
Name: Sales, dtype: float64

In [73]:
df.loc[np.where(df["Sales"] >= 300)]

Unnamed: 0,Date,Sales,Price
1,April-11,300,1
2,April-12,400,2
4,April-14,300,3
5,April-16,300,2


In [74]:
df.loc[df["Sales"] >= 300]

Unnamed: 0,Date,Sales,Price
1,April-11,300,1
2,April-12,400,2
4,April-14,300,3
5,April-16,300,2
