In [45]:
# ruff: noqa: F401, E402, T201, T203, D103, B015, B018
from __future__ import annotations

import secrets
from typing import Any, Iterable, Sequence

import matplotlib.pyplot as plt
import networkx as nx
import numba as nb
import numpy as np
import seaborn as sns
from IPython.core.interactiveshell import InteractiveShell
from numpy import dtype, ma, ndarray

InteractiveShell.ast_node_interactivity = "all"

## Basics

In [6]:
np.empty((3, 5))
np.full((3, 5), 42)
np.zeros((3, 5))
np.ones((3, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

array([[42, 42, 42, 42, 42],
       [42, 42, 42, 42, 42],
       [42, 42, 42, 42, 42]])

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

## Check if vector is in the array

### Naive

In [22]:
# Determine if a vector is in NDarray
a = np.arange(15).reshape((3, 5))
a
[0, 1, 2, 3, 4] in a  # Any
(25, 1, 2, 3, 4) in a  # Any
14 in a
25 in a
print("tolist")
[0, 1, 2, 3, 4] in a.tolist()
(0, 1, 2, 3, 4) in a.tolist()
[1, 1, 2, 3, 4] in a.tolist()

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

True

True

True

False

tolist


True

False

False

### Rabin-Karp

In [None]:
@nb.jit
def sum_hash(it: Iterable) -> int:
    s: int = 0
    for e in it:
        s += hash(e)
    return s

@nb.jit
def match(seq: ndarray, subseq: ndarray, start: int) -> bool:
    return np.array_equal(seq[start : start + len(subseq)], subseq)

In [None]:
@nb.jit
def find_rk(seq: ndarray, subseq: ndarray) -> None:
    n: int = len(seq)
    m: int = len(subseq)
    h1: int = sum_hash(seq[:m])
    h2: int = sum_hash(subseq)
    if h1 == h2 and match(seq, subseq, 0):
        yield 0
    for i in range(n - m):
        h1 += hash(seq[i + m]) - hash(seq[i])
        if h1 == h2 and match(seq, subseq, i + 1):
            yield i + 1

In [124]:
seq: ndarray = np.array([[1, 2, 3, 4], [5, 2, 3, 1], [2, 3, 2, 3]]).flatten()
list(find_rk(seq, np.array([2, 3])))

[1, 5, 8, 10]

## NA Rows / Columns

In [26]:
# Remove rows or columns with invalid (nan, inf) values
a = np.array([[0, 1, 2, 3, 4], [5, 6, np.nan, 8, 9], [10, 11, 12, 13, 14]])
a
m = ma.masked_invalid(a)
m
r = ma.mask_rows(m)
r
r = ma.compress_rows(m)
r
c = ma.mask_cols(m)
c
c = ma.compress_cols(m)
c
rc = ma.mask_rowcols(m)
rc
rc = ma.compress_rowcols(m)
rc

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6., nan,  8.,  9.],
       [10., 11., 12., 13., 14.]])

masked_array(
  data=[[0.0, 1.0, 2.0, 3.0, 4.0],
        [5.0, 6.0, --, 8.0, 9.0],
        [10.0, 11.0, 12.0, 13.0, 14.0]],
  mask=[[False, False, False, False, False],
        [False, False,  True, False, False],
        [False, False, False, False, False]],
  fill_value=1e+20)

masked_array(
  data=[[0.0, 1.0, 2.0, 3.0, 4.0],
        [--, --, --, --, --],
        [10.0, 11.0, 12.0, 13.0, 14.0]],
  mask=[[False, False, False, False, False],
        [ True,  True,  True,  True,  True],
        [False, False, False, False, False]],
  fill_value=1e+20)

array([[ 0.,  1.,  2.,  3.,  4.],
       [10., 11., 12., 13., 14.]])

masked_array(
  data=[[0.0, 1.0, --, 3.0, 4.0],
        [5.0, 6.0, --, 8.0, 9.0],
        [10.0, 11.0, --, 13.0, 14.0]],
  mask=[[False, False,  True, False, False],
        [False, False,  True, False, False],
        [False, False,  True, False, False]],
  fill_value=1e+20)

array([[ 0.,  1.,  3.,  4.],
       [ 5.,  6.,  8.,  9.],
       [10., 11., 13., 14.]])

masked_array(
  data=[[0.0, 1.0, --, 3.0, 4.0],
        [--, --, --, --, --],
        [10.0, 11.0, --, 13.0, 14.0]],
  mask=[[False, False,  True, False, False],
        [ True,  True,  True,  True,  True],
        [False, False,  True, False, False]],
  fill_value=1e+20)

array([[ 0.,  1.,  3.,  4.],
       [10., 11., 13., 14.]])

## Remove dimensions of size 1

In [122]:
a = np.arange(15).reshape((1, 3, 5))
a
a.squeeze()

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]]])

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

## Most Frequent Value

In [133]:
a: ndarray = np.array([[1, 2, 2, 2, 4], [3, 4, 2, 5, 1], [5, 2, 7, 8, 9]])
a
bc = np.bincount(a.flat)
bc
bc.argmax()

array([[1, 2, 2, 2, 4],
       [3, 4, 2, 5, 1],
       [5, 2, 7, 8, 9]])

array([0, 2, 5, 1, 2, 2, 0, 1, 1, 1])

2

## Combine 1D and 2D array