In [1]:
import numpy as np
from numpy import random
import math

array = random.randint(1, 15, 20)
array[(array > 3) & (array < 8 )] *= -1
array

array([-7, 12, 10,  2,  2,  9,  1, -4,  3,  2, -7, -6, 13, -4, 13, -6, 12,
       10, -6, -6])

In [2]:
max_value = np.max(array)
array = np.where(array == max_value, 0, array)
array

array([-7, 12, 10,  2,  2,  9,  1, -4,  3,  2, -7, -6,  0, -4,  0, -6, 12,
       10, -6, -6])

In [3]:
def cartesian(arrays):
    arrays = [np.asarray(a) for a in arrays]
    shape = map(len, arrays)

    ix = np.indices(shape, dtype=int)
    ix = ix.reshape(len(arrays), -1).T

    for n, arr in enumerate(arrays):
        ix[:, n] = arrays[n][ix[:, n]]

    return ix

print(cartesian(([1, 2], [4, 5])))

[[1 4]
 [1 5]
 [2 4]
 [2 5]]


In [4]:
first_array = np.random.randint(0,5,(8,3))
second_array = np.random.randint(0,5,(2,2))

result = (first_array[..., np.newaxis, np.newaxis] == second_array)
rows = (result.sum(axis=(1,2,3)) >= second_array.shape[1]).nonzero()[0]
print(rows)

[0 1 2 3 4 5 6]


In [5]:
Z = np.random.randint(0,5,(10,3))
E = np.logical_and.reduce(Z[:,1:] == Z[:,:-1], axis=1)
U = Z[~E]
print(Z)
print(U)

[[4 1 2]
 [3 1 1]
 [0 2 2]
 [1 3 2]
 [0 0 2]
 [0 3 3]
 [0 3 3]
 [3 3 3]
 [3 1 1]
 [4 0 1]]
[[4 1 2]
 [3 1 1]
 [0 2 2]
 [1 3 2]
 [0 0 2]
 [0 3 3]
 [0 3 3]
 [3 1 1]
 [4 0 1]]


In [6]:
Z = np.random.randint(0, 2, (6,3))
T = np.ascontiguousarray(Z).view(np.dtype((np.void, Z.dtype.itemsize * Z.shape[1])))
_, idx = np.unique(T, return_index=True)
uZ = Z[idx]
print(uZ)

[[0 0 0]
 [0 0 1]
 [1 0 0]
 [1 1 0]]


In [7]:
#non vectorized functions

def prod_non_zero_diag_non_vect(x):
    len_diag = min(len(x), len(x[0]))
    mult = 1
    for i in range(len_diag):
        if x[i][i] != 0:
            mult *= x[i][i]
    return mult
X = np.array([[1, 0, 1], [2, 0, 2], [3, 0, 3], [4, 4, 4]])
prod_non_zero_diag_non_vect(X)

3

In [8]:
def are_multiset_equal_non_vect(x, y):
    if len(x) != len(y):
        return False
    x.sort()
    y.sort()
    for i in range(len(x)):
        if x[i] != y[i]:
            return False
    return True
X = np.array([1, 2, 2, 4])
Y = np.array([4, 2, 1, 2])
are_multiset_equal_non_vect(X, Y)

True

In [9]:
def max_after_zero_non_vect(x):
    after_zero_list = list()
    for i in range(1, len(x)):
        if not x[i - 1]:
            after_zero_list.append(x[i])
    return max(after_zero_list)
X = np.array([6, 2, 0, 3, 0, 0, 5, 7, 0])
max_after_zero_non_vect(X)

5

In [10]:
def run_length_encoding_non_vect(x):
    val_list = [x[0]]
    num_list = list()
    counter = 1
    for i in range(1, len(x)):
        if x[i - 1] == x[i]:
            counter = counter + 1
        else:
            val_list.append(x[i])
            num_list.append(counter)
            counter = 1
    num_list.append(counter)
    return val_list, num_list
X = np.array([2, 2, 2, 3, 3, 3, 5])
run_length_encoding_non_vect(X)

([2, 3, 5], [3, 3, 1])

In [11]:
def pairwise_distance_non_vect(x, y):
    result_matr = list()
    for i in range(len(x)):
        curr_list = list()
        for j in range(len(y)):
            dist = 0
            for k in range(len(x[0])):
                dist += (x[i][k] - y[j][k]) ** 2
            curr_list.append(math.sqrt(dist))
        result_matr.append(curr_list)
    return result_matr



In [12]:
#vectorized functions

def prod_non_zero_diag_vect(x):
    diag = np.diag(X)
    return diag[diag != 0].prod()
X = np.array([[1, 0, 1], [2, 0, 2], [3, 0, 3], [4, 4, 4]])
prod_non_zero_diag_non_vect(X)


3

In [13]:
def are_multiset_equal_vect(x, y):
    x_nums, x_counts = np.unique(x, return_counts=True)
    y_nums, y_counts = np.unique(y, return_counts=True)
    if np.shape(x_nums) != np.shape(y_nums):
        return False
    if np.any(x_nums != y_nums) or np.any(x_counts != y_counts):
        return False
    return True
X = np.array([1, 2, 2, 4])
Y = np.array([4, 2, 1, 2])
are_multiset_equal_non_vect(X, Y)

True

In [14]:
def max_after_zero_vect(x):
    curr = np.ones(1)
    curr = np.hstack((curr, x))
    ind = curr == 0
    return np.max(x[ind[:-1]])
X = np.array([6, 2, 0, 3, 0, 0, 5, 7, 0])
max_after_zero_non_vect(X)


5

In [15]:
def run_length_encoding_vect(x):
    y = np.hstack((np.ones(1), x[:- 1]))
    first_positions = x != y
    first_positions[0] = True
    indexes_1 = np.arange(np.size(x))[first_positions]
    indexes_2 = np.hstack((indexes_1[1:], np.array([np.size(x)])))
    return x[first_positions], indexes_2 - indexes_1
X = np.array([2, 2, 2, 3, 3, 3, 5])
run_length_encoding_non_vect(X)

([2, 3, 5], [3, 3, 1])

In [16]:
def pairwise_distance_vect(x, y):
    return np.sqrt(np.sum((x[:, np.newaxis] - Y) ** 2, axis=-1))

In [17]:
# CrunchMunchies

calorie_stats = np.loadtxt("https://raw.githubusercontent.com/andrewD46/pandas_numpy_tasks/main/data/cereal.csv",delimiter=",")
calorie_stats

array([ 70., 120.,  70.,  50., 110., 110., 110., 130.,  90.,  90., 120.,
       110., 120., 110., 110., 110., 100., 110., 110., 110., 100., 110.,
       100., 100., 110., 110., 100., 120., 120., 110., 100., 110., 100.,
       110., 120., 120., 110., 110., 110., 140., 110., 100., 110., 100.,
       150., 150., 160., 100., 120., 140.,  90., 130., 120., 100.,  50.,
        50., 100., 100., 120., 100.,  90., 110., 110.,  80.,  90.,  90.,
       110., 110.,  90., 110., 140., 100., 110., 110., 100., 100., 110.])

In [18]:
average_calories = np.average(calorie_stats) - 60
average_calories

46.883116883116884

In [19]:
calorie_stats_sorted = np.sort(calorie_stats)
calorie_stats_sorted

array([ 50.,  50.,  50.,  70.,  70.,  80.,  90.,  90.,  90.,  90.,  90.,
        90.,  90., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
       100., 100., 100., 100., 100., 100., 100., 100., 110., 110., 110.,
       110., 110., 110., 110., 110., 110., 110., 110., 110., 110., 110.,
       110., 110., 110., 110., 110., 110., 110., 110., 110., 110., 110.,
       110., 110., 110., 110., 120., 120., 120., 120., 120., 120., 120.,
       120., 120., 120., 130., 130., 140., 140., 140., 150., 150., 160.])

In [20]:
median_calories = np.median(calorie_stats_sorted)
median_calories

110.0

In [21]:
nth_percentile = min(np.array([np.percentile(calorie_stats_sorted, x) for x in range(100)]))
nth_percentile

50.0

In [22]:
more_calories = f'{((len([calorie_stats_sorted[x] for x in range(len(calorie_stats_sorted)) if calorie_stats_sorted[x] > 60]) / len(calorie_stats_sorted)) * 100)}%'
more_calories

'96.1038961038961%'

In [23]:
calorie_std = np.std(calorie_stats)
calorie_std

19.35718533390827