# Numpy Reference Sheet

In [1]:
import numpy as np

np.info(np.array)       #info on functions

# Array Creation

In [None]:
a = np.array([1,2,3,4], dtype=float)   #create array and define data type(if needed)
a = np.array([1,2,3], ndmin=3)         #create 3d array
a = np.arange(0,20,2)                  #create array from 0 to 20(not inclusive), increment of 2 each time
a = np.linspace(0,1,5)                 #create array from 0 to 1(inclusive), with 5 elements
a = np.logspace(0,100,60)              #create array log base 10 of 0 to 100 with 60 points
a = np.ones((3,4))              #create array with 3 rows, 4 cols filled with 1
a = np.zeros((3,4))             #create 3x4 array all filled with 0
a = np.empty((3,4))             #create 3x4 array all empty (avoid as will be filled with arbitrary elements)
a = np.full((3,4), 7)           #create 3x4 array all filled with 7
a = np.eye(4) or a = np.identity(4)    #create 4x4 martrix filled with 1 along main diagonal
a = np.diagflat(a,1)                   #create array with a flattened to 1d to fill diag offset by 1 from main diag

np.random.seed(0)                   #set seed of random generator
a = np.random.random((3,4))         #create 3x4 matrix with random numbers
a = np.random.normal(0,1,(3,4))     #create 3x4 matrix with mean 0 and std dev 1
a = np.random.randint(0,20,(3,4))   #create 3x4 matrix with random ints from 0-20
a = np.random.permutation(10)       #create array with numbers 0-9 shuffled, same as np.random.shuffle(a)
a = np.random.choice(x, 150, p=[0.5, 0.25]) where x=['type1', 'type2']  #probabilistic sampling to create 150samples

a.copy()                  #makes a copy of array (a[:2].copy() makes a copy of subarray)

iterable = (x for x in range(10))     #create array from generator
print(np.fromiter(iterable, np.int))

a = np.array([[v[j] for j in ['a', 'b', 'c', 'd']] for k, v in t.items()])   #create array from dict

a.tolist()    #convert np array to list
a.astype(int) #convert data type of element

# Array Info

In [None]:
a.ndim                    #number of dimensions of array
a.shape                   #size of each dimension
a.dtype                   #datatype of elements in array
a.size                    #total size in bytes
a.itemsize                #size of each element in bytes

# Changing Array Shape

In [None]:
a.reshape(4,-1)           #reshape to 4x? matrix
a.reshape(-1)             #flatten to 1d array, same as np.ravel(a). Use a.flat() to create deep copy
a.reshape(4,3,2)          #3d array with 4 arrays(axis=0), 3 rows(axis=1), 2 cols(axis=2)
a.reshape(3,2)            #2d array with 3 rows(axis=0), 2 cols(axis=1)
a[np.newaxis,:]           #change 1d(4,) to 2d row vector(1,4) 
a[:,np.newaxis]           #change 1d(4,) to 2d col vector(4,1), same as np.column_stack(a)          
a[np.newaxis]             #change 1d to 2d matrix

np.tile(a, (3, 1))            #repeat a by 3 times by row, same as np.concatenate([x]*3, axis=0)
np.tile(a, (3, 2))            #repeat a by 3 times by row and 2 times by col

np.repeat(x, 3, axis=1)   #repeat each element by 3 times for each row

![image.png](attachment:image.png)

![image.png](attachment:image.png)


# Combining/Splitting Array and Inserting/Deleting Row/Col

In [None]:
#combining arrays
np.concatenate((a,b,c),axis=0)    #returns copy of appending b and c to a by adding new rows, same as np.vstack([a,b,c])
np.concatenate((a,b,c),axis=1)    #returns copy of appending b and c to a by adding new cols, same as np.hstack([a,b,c])
np.concatenate((a,b,c),axis=None) #returns copy of appending elements in b and c to a in 1d array, same as np.append(a,[b,c])
np.column_stack((a,b,c))          #turn 1d array into 2d columns first before doing hstack         

#splitting arrays
x,y,z = np.split(a,3,axis=1)      #split matrix into 3 equal sections by cols !!ops after split will affect original matrix!!
x,y,z = np.split(a, [1,2])        #x:row0, y:row1, z:row2 to end !!ops after split will affect original matrix!!
np.hsplit(a,2)                    #split a into 2 equal sections by col, same as np.split(a,2,axis=1)

#inserting row/col
np.insert(a, 1, [1,2,3])         #insert 1,2,3 at index 1
np.insert(a,1,[1,2,3],axis=0)    #insert 1,2,3 at row 1
np.insert(a,1,[1,2,3],axis=1)    #insert 1,2,3 at col 1

#deleting row/col
np.delete(a, [0,2], axis=0)    #return copy of matrix with rows 0 and 2 deleted

# Array Operations

In [None]:
#Arithmetic Operations
np.sum(a, axis=0)      #return copy of elements summed across column, same as np.add.reduce(a)
np.cumsum(a, axis=1)   #return copy of cumulative summation of elements across row, same as np.add.accumulate(a, axis=1)
np.subtract.accumulate(a)   #store intermediate results of subtraction
np.diff(a)                  #difference between neighbouring element
np.prod(a,axis=1)      #return copy of elements multiplied across row, same as np.multiply.reduce(a)
np.cumprod(a, axis=0)  #return copy of cumulative product of elements along col, same as np.multiply.accumulate(a)
np.divide(a, 5)
np.floor_divide(a, 5)
np.mod(a, 5)               #same as np.remainder(a,5)
np.power(2,x,out=y[::2])   #raise x elements to power 2 and fill y with step of 2

np.negative(a)
np.sign(a)                #obtain sign of each element (-1, 1 or 0)
np.round(a, decimals=1)   #round off to specified decimal
np.rint(a)                #round off to nearest integer
np.floor(a)
np.ceil(a)
np.truc(a)
np.abs(a)

#Mathematical functions
np.sqrt(a)
np.reciprocal(a)        #reciprocal for each element
np.log(a)         
np.log10(a)
np.log2(a)
np.log1p(a)             #natural log (ln)
np.cos(a)
np.sin(a)
np.exp(a)               #to compute e^a for each element

#Matrix functions
np.dot(a,b)        #matrix multiplication for 2d, inner product for 1d (a1b1+a2b2), row*col
np.inner(a,b)      #inner product of 2 matrices, row*col
np.outer(a,b)      #outer product of 2 matrices, col*row
np.cross(a,b)      #cross product of 2 matrices
np.linalg.matrix_power(a,3)   #multiply matrix 3 times
np.linalg.det(a)              #to obtain determinant (>0 to have inverse)
np.linalg.inv(a)              #inverse of matrix 

a.T                       #returns copy of transposed matrix, same as np.transpose(a), np.swapaxes(a,0,1)
np.flip(a)                #returns copy of array flipped from back to front, same as a[::-1,::-1]
np.fliplr(a)              #returns copy of horizontal flip of matrix
np.flipud(a)              #returns copy of vertical flip of matrix
np.rot90(a,2)             #returns copy of matrix rotated 2x90deg CCW 
np.rot90(a,-3)            #returns copy of matrix rotated 3x90deg CW

#to replace values using dictionary
my_dict = {1:23, 2:34, 3:36, 4:45}
np.vectorize(my_dict.get)(a)

#Polynomials
np.roots([1,-4,7])          #find roots of polynomial x^2-4x+7
np.polyval([1, -2, 1], 2)   #evaluate polynomial when x=2
np.polyadd(a,b)             #add one poly to another
np.polysub(a,b)
np.polymul(a,b)
np.polydiv(a,b)

#Others
np.pad(a,3,'constant',constant_values=7)       #pad array with 7 and pad width 3 
np.array(np.meshgrid(x, y, z)).T.reshape(-1,3) #return all combinations with each element from each array

np.multiply.outer([1, 2, 3], [4, 5, 6])
# array([[ 4,  5,  6],
#        [ 8, 10, 12],
#        [12, 15, 18]])

eigval,eigvec=np.linalg.eig(m)


# Sorting 

In [None]:
np.sort(a, axis=1)          #return copy of matrix with each row sorted 
np.argsort(a)               #return copy of indices to form sorted array
np.argsort(np.argsort(a))   #return copy of ranking of each element
np.partition(a,2,axis=1)    #return copy of matrix with index 2 item in correct place, smaller on left, bigger on right
np.argpartition(a,2)        #return copy of indices to form partitioned array
a[np.argpartition(a,range(5))])     #to sort first 5 elements only

ind = np.lexsort((names, surnames))              #return indices to sort by surnames first, then by names
[surnames[i] + ", " + names[i] for i in ind]

data_type = [('name', 'S15'), ('class', int), ('height', float)]        #define data name and data type   
students_details = [('James', 5, 48.5), ('Nail', 6, 52.5),('Paul', 5, 42.10), ('Pit', 5, 40.11)]
students = np.array(students_details, dtype=data_type)   #create np array with defined name and data type
np.sort(students, order=['class','height'])              #to sort by class then height

# Searching

In [None]:
#Stats
np.max(a)            #find max element, same as np.amax(a)
np.argmax(a)         #find index of max element along axis of flattened array, unless specified
np.min(a)            #find min element, same as np.amin(a)
np.argmin(a)         #find index of min element along axis of flattened array, unless specified
np.mean(a)
np.average(a, weights=b)  #compute weighted average with b containing list of weights
np.std(a)
np.median(a)
np.var(a)
np.percentile(a, 50)  #return value at 50th percentile (median)

#Searching within 1 array
np.where(a<4)                    #return list of indices (tuples) tt meet condition
np.where(a<4)[0]                 #return list of rows where any element meet condition
np.where(a>4)[1]                 #return list of cols where any element meet condition

np.count_nonzero(a == "Down")    #count non-zero values or those who meet condition
np.count_nonzero(a<4, axis=0)    #count those who meet condition in each col
np.count_nonzero(a%2==0, axis=1) #count those who meet condition in each row

np.partition(a,4)[:4]                #extract 4 smallest elements, same as a[np.argpartition(a,4)[:4]]  
np.partition(a,a.shape[0]-5)[-5:]    #to find indices of 5 largest elements, same as a[np.argpartition(a,a.shape[0]-5)[-5:]]

val,idx=np.unique(a,return_index=True) #return unique values and their first indices
np.unique(a, return_counts=True)       #return sorted list of unique values in a, returning count of each, eg a=np.array([10,10,20,10,20,20,20,30,30,50,40,40])
val,count=np.unique(x, return_counts=True)  #count will contain freq of each number
np.digitize(a, bins=[10, 20])   #throw elements into 3 bins, bin0: <10, bin1: betw 10 and 20, bin2: >20
np.bincount(x).argmax()         #bincount to find freq of each number. argmax to find the most frequent number, max to find highest freq

np.ravel_multi_index((1, 2), (3, 3))  #find index of (1,2) after flattening 3x3 matrix to 1d array
np.unravel_index([22, 41, 37], (7,6)) #find coordinates of index22,41,37 in 7x6 matrix

x[np.abs(x-v).argmin()]         #to find element in x closest to v
a[np.triu_indices(3)]           #extract elements in the upper triangle

for i in np.nditer(a):           #to iterate thru each element regardless of array dimension

#Searching across 2 arrays    
np.setdiff1d(a,b)           #return sorted values unique in a and not found in b
np.setxor1d(a,b)            #return sorted values that are in only one array
np.setunion1d(a,b)          #return all values in a and b, sorted


# Boolean Mask

In [None]:
a<4                         #check which elements meet condition
np.where(a<4,-1,100)             #return copy of matrix where elements which meet condition become -1 and the rest become 100
np.where(a<4,a,100)              #return copy of matrix where elements which do not meet condition become 100 and the rest remain unchanged
np.where(a<4,a*2,a)              #return copy of matrix where only elements which meet condition are processed

np.any(a)                   #check if at least 1 non-zero in matrix
np.any(a<4, axis=0)         #check if at least 1 meet condition in each col
np.any(a%2==1, axis=1)      #check if at least 1 meet condition in each row

np.all(a)                        #check if all are non-zero, same as np.nonzero(a)
np.all((a<4)&(a!=2), axis=0)     #check if all meet both conditions in each col
np.all((a%2==1)|(a<4), axis=1)   #check if all meet at least one condition in each row

np.isnan(a)                 #check for missing values (NaN)
~np.isnan(a)                #check for non-missing values
np.isfinite(a)              #check if all values are finite
np.isinf(a)                 #check if all values are infinite
np.iscomplex(a)             #check if all values are complex, a.imag() will extract the complex values
np.isreal(a)                #check if all values are real, a.real() will extract the real values
np.isscalar(a)              #check if array is 1d
np.isin(a,b)                #check if elements in a are in b

# Slicing Array
Will return view which will affect original array when modified

In [None]:
a[array,row,col]     #3d array, also a[axis0,axis1,axis2]
a[row,col]           #2d array, also a[axis0, axis1]
a[col]               #1d array, also a[axis0]

#row slicing (2d)
a[2]                 #slicing a single row (return 1d array)
a[2:-2:3]            #slicing rows 2 to 2nd last row(not inclusive) with step of 3
a[-1:-3:-1]          #slicing last row to 3rd last(not inclusive) [reversed order] 
a[::-1]              #reverse sequence of rows

#column/both slicing (2d)
a[:,3]               #slicing column 3 for all rows (return 1d array)
a[:,3:10:2]          #slicing columns 3 to 10(not inclusive) with step 2 for all rows
a[:,-2:-6:-1]        #slicing 2nd last col to 6th last col(not inclusive) for all rows [reversed order] 
a[::-1,::-1]         #reverse both rows and columns for array

#Using Boolean Mask
a[a<5]              #return copy of elements meeting condition in 1d array
a[np.all(a>5),:]    #return copy of rows that satisfy condition
a[:,np.any(a>5)]    #return copy of cols that satisfy condition
a[~np.all(a>5),:]   #return copy of rows that don't satisfy condition
a[~np.isnan(a).any(axis=1)]       #remove rows which contain NaN

#using Fancy Indexing
row = np.array([0,2])
col = np.array([1,3])
m[row,col]                        #return (0,1), (2,3)

row = np.array([0,2])                #row vector shape of 2,
col = np.array([1,3])[:,np.newaxis]  #column vector shape of 2,1
m[row,col]                        
#  
# 0 2   1 1    =   0,1  2,1
# 0 2   3 3    =   0,3  2,3
#

# Diagonal Operations

In [None]:
a[np.dia_indices(4)]      #slice main diagonal
a.diagonal(2)             #returns a copy of diagonal with offset of 2 from main diagonal
np.fliplr(a).diagonal(1)  #returns a copy of diag with offset 1 from minor diag
a.fill_diagonal(4)        #fill main diagonal with 4
np.fill_diagonal(a, [4*i for i in range(4)])  #fill diagonal using list comprehension
np.trace(a)               #sum of diagonal elements

print([a[len(a)-1-i][i] for i in range(len(a)-1,-1,-1)])  #print minor diagonal

np.triu(a,-1)          #Return copy of a with elements below the k-th diagonal zeroed
np.tril(a,1)           #Return copy of a with elements above the k-th diagonal zeroed

# Normalising/Mean centering/Moving Summation or Average of Array

In [None]:
#normalizing each column (0 to 1)
x_min = x.min(axis = 0)
x_max = x.max(axis = 0)
x_normalized = (x - x_min) / (x_max - x_min)  

#mean normalization
x_mean = x.mean(axis = 0)
x_centered = x - x_mean   

#moving summation/average
x = np.array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])
y = np.cumsum(x)
y[n:] = y[n:] - y[:-n]    #to find moving summation for every 3 elements
y[n - 1:] / n             #to find moving average for every 3 elements

# Broadcasting

![image.png](attachment:image.png)

# Numpy Data Types

![image.png](attachment:image.png)
![image.png](attachment:image.png)

# Matrix Properties

<div>
<img src="attachment:image.png" width="500"/>
</div>

## Linear Algebra

![image.png](attachment:image.png)

## Adjacency/Connection/Vertex Matrix

![image.png](attachment:image.png)

Undirected Graph
- Add 0 for no self loop and edge, add 1 for each edge, add 2 for each loop 
- If simple graph: all 0 along diagonal
- Degree of a vertex found by summing values in either its respective row or column in the adjacency matrix.
- Will always be symmetric
- number of triangles in an undirected graph G is exactly the trace of A^3 divided by 6

Directed Graph
- Non-zero element A_ij indicates edge from i to j
- in-degree of vertex can be found by summing respective col
- out-degree of vertex can be found by summing respective row

When A^n, each element (i,j):
- gives the number of paths of steps n from vertex i to j
- gives the min distance betw i and j for the smallest non-negative n
- zero means not able to move from i to j for given steps n 
- np.dot(A,A) gives number of common neighbours between i and j

## Distance Matrix 

![image.png](attachment:image.png)

Distance Matrix (weighted adjacency matrix)
- entries on the main diagonal are all zero (that is, the matrix is a hollow matrix), i.e. xii = 0 for all 1 ≤ i ≤ N,
- all the off-diagonal entries are positive (xij > 0 if i ≠ j), (that is, a non-negative matrix),
- nodes that are not connected should be set to infinity
- the matrix is a symmetric matrix (xij = xji), and
- for any i and j, xij ≤ xik + xkj for all k (the triangle inequality). This can be stated in terms of tropical matrix multiplication
- Given two n x n matrices A and B, their distance product C is defined as an n x n matrix such that c(i,j)=min _{k=1}^{n}(a(i,k)+b(k,j))
- W^k gives the distances between vertices using paths of length at most k edges and W^n is the distance matrix of the graph (can check for shortest path Floyd Warshall). 

## Incidence Matrix

![image.png](attachment:image.png)

![image.png](attachment:image.png)

Undirected Incidence Matrix
- Sum of each column is equal to 2 because each edge has a vertex connected to each end

Directed Incidence Matrix
- Bi,j = −1 if the edge ej leaves vertex vi, 1 if it enters vertex vi and 0 otherwise.

- If one row of A is deleted, the resulting (n — 1) x b matrix is called the reduced incidence matrix A1. 
- Given A1, A is easily obtained by using the first property. 
- It is possible to find the exact number of trees that can be generated from a given graph if the reduced incidence matrix A1 is known and the number of possible trees is given by Det (A1AT1) where AT1 is the transpose of the matrix A1.



![image.png](attachment:image.png)

# Applications

In [None]:
#to find index of local peaks, where it is surrounded by smaller values on both sides
a = np.array([1, 3, 7, 1, 2, 6, 0, 1])
doublediff = np.diff(np.sign(np.diff(a)))
peak_locations = np.where(doublediff == -2)[0] + 1   

#alternative:
a = np.array([1, 3, 7, 1, 2, 6, 0, 1])
check = a[1:-1]
before = a[:-2]
after = a[2:]
pos = np.where((before<check) & (after<check))[0] + 1

In [None]:
#to mark out duplicates/unique values
result = np.full(x.shape[0], True)   #mark out duplicates as True
result[np.unique(x, return_index=True)[1]] = False  #mark 1st occurrence as False
result

In [None]:
def one_hot_encodings(arr):
    uniqs = np.unique(arr)
    out = np.zeros((arr.shape[0], uniqs.shape[0]))
    for i, k in enumerate(arr):
        out[i, k-1] = 1
    return out

In [None]:
# Bin petal length 
petal_length_bin = np.digitize(iris[:, 2].astype('float'), [0, 3, 5, 10])

# Map it to respective category
label_map = {1: 'small', 2: 'medium', 3: 'large', 4: np.nan}
petal_length_cat = [label_map[x] for x in petal_length_bin]

In [None]:
def discretize(location, grid):
    return tuple(int(np.digitize(l, g)) for l, g in zip(location, grid))

# grid - bins - we will consider any value lower than 1 bin 0 and any value larger than 4 bin 4
grid = [np.array([1,2,3,4]),np.array([1,2,3,4])]

location =[2.5,1.2]
print(discretize(location,grid))
# (2, 1)

In [None]:
def uniquerow(x):
    y = np.ascontiguousarray(x).view(np.dtype((np.void, x.dtype.itemsize * x.shape[1])))
    _, idx = np.unique(y, return_index=True)

    unique_result = x[idx]
    return(unique_result)