In [1]:
import numpy as np
np.__version__
from sklearn.preprocessing import normalize


In [2]:
arr = np.array(range(10))
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
# create 3*3 array of all true
np.full((3, 3), True, dtype=bool)

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [4]:
arr[arr%2 !=0]

array([1, 3, 5, 7, 9])

# where

In [5]:
arr2 = np.where(arr%2 !=0, -1, arr)
arr2

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

# reshape

In [6]:
arr2.reshape(2,-1)    # -1 automatically decides on the number of columns.

array([[ 0, -1,  2, -1,  4],
       [-1,  6, -1,  8, -1]])

# concat

In [7]:
arr2 = arr2.reshape(2,-1)
arr = arr.reshape(2,-1)
arr3 = np.concatenate([arr, arr2], axis=0)    # axis=0 --> run accross rows
arr3

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [ 0, -1,  2, -1,  4],
       [-1,  6, -1,  8, -1]])

In [8]:
arr3 = np.concatenate([arr, arr2], axis=1)    # axis=1 --> run accross columns
arr3

array([[ 0,  1,  2,  3,  4,  0, -1,  2, -1,  4],
       [ 5,  6,  7,  8,  9, -1,  6, -1,  8, -1]])

# intersection

In [9]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([2,7,10,2,7,4,9,4,9,8])
np.intersect1d(a, b)

array([2, 4])

# diff (remove uncommon)

In [10]:
np.setdiff1d(a, b)

array([1, 3, 5, 6])

# position where match

In [11]:
np.where(b==a)

(array([3, 5, 7]),)

In [12]:
a[(a>5) & (a<11)]

array([6])

# Convert the function maxx that works on two scalars, to work on two arrays.

In [13]:
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y
pair_max = np.vectorize(maxx, otypes=[float])
pair_max(a, b)

array([ 2.,  7., 10.,  2.,  7.,  4.,  9.,  4.,  9.,  8.])

In [14]:
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [15]:
arr[:, [0, 2, 1]]

array([[0, 2, 1],
       [3, 5, 4],
       [6, 8, 7]])

In [16]:
arr[[0, 2, 1], :]

array([[0, 1, 2],
       [6, 7, 8],
       [3, 4, 5]])

In [17]:
np.random.randint(low=5, high=10, size=(5, 3))

array([[7, 9, 5],
       [9, 8, 5],
       [5, 9, 8],
       [9, 8, 5],
       [7, 5, 9]])

# removing scientific notation

In [18]:
np.set_printoptions(suppress=True, precision=6)
rand_arr = np.random.random([3,3])/1e3
rand_arr

array([[0.000578, 0.000315, 0.00017 ],
       [0.000741, 0.000485, 0.000363],
       [0.000905, 0.000973, 0.000528]])

# limit on number of print args

In [19]:
np.set_printoptions(threshold=8)
np.arange(15)

array([ 0,  1,  2, ..., 12, 13, 14])

In [20]:
np.set_printoptions(threshold=np.nan)
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
a[1]

1

In [22]:
a = (1, 2, 3, 4)
type(a)

tuple

In [23]:
a = np.array(a).reshape(2,2)

In [24]:
# mean: sum(a)/n
np.mean(a)

2.5

In [25]:
# median: is the middle number of array  
np.median(a)

2.5

In [26]:
# The standard deviation is a summary measure of the differences of each observation from the mean.
# std = a-mean
np.std(a)

1.118033988749895

In [27]:
a = np.random.uniform(low=5, high=10, size=(5, 3))
a

array([[7.902713, 6.712149, 7.757428],
       [6.465373, 6.459064, 6.525677],
       [7.597196, 6.478671, 7.43516 ],
       [7.501775, 5.686723, 7.39259 ],
       [8.455867, 6.183797, 9.387486]])

# normalize 

In [28]:
norm2 = normalize(a, axis=0).ravel()
norm2

array([0.464264, 0.475425, 0.447265, 0.379824, 0.457499, 0.376247,
       0.446315, 0.458888, 0.428684, 0.44071 , 0.402794, 0.42623 ,
       0.49676 , 0.438002, 0.541248])

In [29]:
norm2[10] = np.nan
norm2

array([0.464264, 0.475425, 0.447265, 0.379824, 0.457499, 0.376247,
       0.446315, 0.458888, 0.428684, 0.44071 ,      nan, 0.42623 ,
       0.49676 , 0.438002, 0.541248])

In [30]:
np.where(np.isnan(norm2))

(array([10]),)

In [31]:
np.isnan(norm2).sum()

1

In [32]:
norm2

array([0.464264, 0.475425, 0.447265, 0.379824, 0.457499, 0.376247,
       0.446315, 0.458888, 0.428684, 0.44071 ,      nan, 0.42623 ,
       0.49676 , 0.438002, 0.541248])

In [33]:
norm3 = np.delete(norm2, np.where(np.isnan(norm2)), axis=0)
norm3

array([0.464264, 0.475425, 0.447265, 0.379824, 0.457499, 0.376247,
       0.446315, 0.458888, 0.428684, 0.44071 , 0.42623 , 0.49676 ,
       0.438002, 0.541248])

In [34]:
np.unique(norm2)

array([0.376247, 0.379824, 0.42623 , 0.428684, 0.438002, 0.44071 ,
       0.446315, 0.447265, 0.457499, 0.458888, 0.464264, 0.475425,
       0.49676 , 0.541248,      nan])

In [35]:
norm2 = norm2.reshape(norm2)

TypeError: 'numpy.float64' object cannot be interpreted as an integer

# getting second largest value 

In [None]:
norm3[::-1].sort()
print(norm3)
norm3[1]

In [None]:
vals, count = np.unique(norm3, return_counts=True)
vals, count

In [None]:
print(norm3[np.argmax(count)])

In [None]:
np.argwhere(norm3 > 0.1)[0]

In [None]:
np.where(norm3 > 0.5, 10, norm3)

In [None]:
norm3.argsort()

In [71]:
# one-hot encoding
np.random.seed(101) 
arr = np.random.randint(1,4, size=6)
uniq = np.unique(arr)
print(arr)
out = np.zeros((arr.shape[0], uniq.shape[0]))
for i, x in enumerate(arr):
    out[i][np.where(uniq == x)[0]] = 1
    
print(out)

[2 3 2 2 2 1]
[[0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]


In [94]:
# remove nan
a = np.array([1,2,3,np.nan,5,6,7,np.nan])
x = a[~np.isnan(a)]
x

array([1., 2., 3., 5., 6., 7.])

In [None]:
# find nth iteration
x = np.array([1, 2, 1, 1, 3, 4, 3, 1, 1, 2, 1, 1, 2])
