# List and NumPy Array Manipulation examples

In [15]:
import numpy as np  # For NumPy versions

## Function that receives a list and returns another one removing duplicates

### Solutions with lists

In [16]:
def remove_duplicates_for(list_input):
    '''Iterate through the list and add elements to another list if they're not already present'''
    list2 = []
    for elem in list_input:
        if elem not in list2:
            list2.append(elem)
    return list2


def remove_duplicates_comprehension(list_input):
    '''Identical to the previous one using list comprehension'''
    list2 = []
    [list2.append(elem) for elem in list_input if elem not in list2]
    return list2


def remove_duplicates_set(list_input):
    '''Converting to a set (a set by definition cannot have repeated elements).
    Loses the order of elements'''
    return list(set(list_input))


def remove_duplicates_enumerate(list_input):
    '''Using enumerate to iterate through the list and check if the element is in the list up to the current index'''
    return [v for i, v in enumerate(list_input) if v not in list_input[:i]]


def remove_duplicates_for_enumerate(list_input):
    '''Same as the previous one, but imperative'''
    list2 = []
    for i, v in enumerate(list_input):
        if v not in list_input[:i]:
            list2.append(v)
    return list2

# Test
print(remove_duplicates_for_enumerate([1, 2, 3, 4, 5, 6, 7, 8, 9, 1]))
print(remove_duplicates_for_enumerate([2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 1]))
print(remove_duplicates_set([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1]))
print(remove_duplicates_for_enumerate([1, 2, 1, 1, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2]))
print(remove_duplicates_for_enumerate([True, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1])) # True == 1
print(remove_duplicates_for_enumerate([1, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1])) # True == 1
print(remove_duplicates_for_enumerate(['1', 2, '1', 3, 4, 5, 6, 7, 8, 9, '1', 1, 1]))

[1, 2, 3, 4, 5, 6, 7, 8, 9]
[2, 3, 4, 5, 6, 7, 8, 9, 1]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[True, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
['1', 2, 3, 4, 5, 6, 7, 8, 9, 1]


### Solution with NumPy

In [17]:
def remove_duplicates_numpy(list_input):
    return np.unique(list_input).tolist()

# Test
print(remove_duplicates_numpy([1, 2, 3, 4, 5, 6, 7, 8, 9, 1]))
print(remove_duplicates_numpy([2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 1])) # NumPy returns sorted values, we lose original order
print(remove_duplicates_numpy([1, 2, 1, 1, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2]))
print(remove_duplicates_numpy([True, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1]))
print(remove_duplicates_numpy([1, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1]))
print(remove_duplicates_numpy(['1', 2, '1', 3, 4, 5, 6, 7, 8, 9, '1', 1, 1]))


[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
['1', '2', '3', '4', '5', '6', '7', '8', '9']


### Variant: removing duplicates from ndarrays

In [18]:
# No need to define a new function. NumPy already has a function to remove duplicates.

# Test
print(np.unique(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 1])))
print(np.unique(np.array([2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 1]))) # NumPy returns sorted values
print(np.unique(np.array([1, 2, 1, 1, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2])))

# In this case, it assigns a 1 to True when creating the array, not when processing with unique
array_test_types = np.array([True, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1])
print(array_test_types)
print(array_test_types.dtype) # Chooses the type in which all elements "fit" (by casting)

print(np.unique(np.array([1, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1])))
print(np.unique(np.array(['1', 2, '1', 3, 4, 5, 6, 7, 8, 9, '1', 1, 1])))

[1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6 7 8 9]
[1 2 1 3 4 5 6 7 8 9 1 1 1]
int64
[1 2 3 4 5 6 7 8 9]
['1' '2' '3' '4' '5' '6' '7' '8' '9']


## Function that receives a list of lists and returns another one removing duplicates

In [19]:
def remove_duplicates_in_sublists(list_of_sublists):
    visited = set()
    new_list = []
    for sublist in list_of_sublists:
        new_sublist = []
        for elem in sublist:
            if elem not in visited:
                new_sublist.append(elem)
                visited.add(elem)
        new_list.append(new_sublist)
    return new_list

# Test 
print(remove_duplicates_in_sublists([[1, 2, 3], [1, 5, 3], [1, 2, 3]]))
print(remove_duplicates_in_sublists([[1, 2, 3], [2, 3, 4], [4, 5, 6]]))


[[1, 2, 3], [5], []]
[[1, 2, 3], [4], [5, 6]]


## Function that receives a NumPy matrix and returns another one replacing repeated values with NaN

In [20]:
def remove_duplicates_numpy_matrix(matrix: np.ndarray):
    matrix = matrix.astype(float)  # NaN is an indicator that an element is not a number, but is a value of type float
    already_seen = set()  # Set of elements
    for i, row in enumerate(matrix):
        for j, elem in enumerate(row):
            if elem in already_seen:
                matrix[i][j] = np.nan
            already_seen.add(elem)
    return matrix

array_t = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
print(remove_duplicates_numpy_matrix(array_t))
print(array_t)

# Test
print(remove_duplicates_numpy_matrix(np.array([[1, 1, 3], [3, 5, 7], [8, 2, 3]])))

[[ 1.  2.  3.]
 [nan nan nan]
 [nan nan nan]]
[[1 2 3]
 [1 2 3]
 [1 2 3]]
[[ 1. nan  3.]
 [nan  5.  7.]
 [ 8.  2. nan]]


## Function that receives a list and returns a dictionary with the number of times each element appears.
Dictionary keys should be the list elements and values should be the number of times that element appears in the list

### Solution with lists

In [21]:
def count_elements(list_input):
    dict_result = {}
    for element in list_input:
        if element in dict_result:
            dict_result[element] += 1
        else:
            dict_result[element] = 1
    return dict_result

# Test
print(count_elements([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1]))
print(count_elements([1, 2, 1, 1, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2]))
print(count_elements([True, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1])) # True == 1
print(count_elements(['1', 2, '1', 3, 4, 5, 6, 7, 8, 9, '1', 1, 1]))


{1: 3, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{1: 5, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{True: 5, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{'1': 3, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 1: 2}


### Solution with NumPy

In [22]:
def values_with_frequencies(vector):
    keys, values = np.unique(vector, return_counts=True)
    return dict(zip(keys.tolist(), values.tolist()))

# Test
print(values_with_frequencies([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1]))
print(values_with_frequencies([1, 2, 1, 1, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2]))
print(values_with_frequencies([True, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1]))
print(values_with_frequencies(['1', 2, '1', 3, 4, 5, 6, 7, 8, 9, '1', 1, 1]))

{1: 3, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{1: 5, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{1: 5, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{'1': 5, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1}


In [23]:
def values_with_frequencies(array: np.ndarray):
    return {x: int(np.count_nonzero(array == x)) for x in np.unique(array).tolist()}
# Only works specifically with ndarrays since the equality array == x returns an array of booleans

# Test
print(values_with_frequencies(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1])))
print(values_with_frequencies(np.array([1, 2, 1, 1, 3, 4, 5, 6, 7, 8, 9, 1, 1, 2])))
print(values_with_frequencies(np.array([True, 2, True, 3, 4, 5, 6, 7, 8, 9, True, 1, 1])))
print(values_with_frequencies(np.array(['1', 2, '1', 3, 4, 5, 6, 7, 8, 9, '1', 1, 1])))

{1: 3, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{1: 5, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{1: 5, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}
{'1': 5, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, '9': 1}


## Function that receives two square matrices (NxN) and returns a third matrix containing the value 1 in positions where the values of A and B match and 0 otherwise.

### Solutions with lists

In [24]:
def equal_values_matrix(matrix1, matrix2):
    matrix3 = []
    for i, row in enumerate(matrix1):
        matrix3.append([])
        for j, elem in enumerate(row):
            if elem == matrix2[i][j]:
                matrix3[i].append(1)
            else:
                matrix3[i].append(0)
    return matrix3

def equal_values_matrix_comprehension(matrix1, matrix2):
    return [[1 if elem == matrix2[i][j] else 0 for j, elem in enumerate(row)] for i, row in enumerate(matrix1)]

# Tests
matrix1 = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] # 3x3 Matrix
list_of_lists2 = [[1, 5, 6], [7, 5, 9], [1, 2, 9]] # 3x3 Matrix
print(equal_values_matrix(matrix1, list_of_lists2)) # Should display an identity matrix
assert equal_values_matrix(matrix1, list_of_lists2) == [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

assert equal_values_matrix_comprehension(matrix1, list_of_lists2) == [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

# TODO: verify that matrices are square and of the same size


[[1, 0, 0], [0, 1, 0], [0, 0, 1]]


### Solution with NumPy

In [25]:
def equal_values_matrix_numpy(matrix1: np.ndarray, matrix2: np.ndarray):
    return np.where(matrix1 == matrix2, 1, 0)

# Tests
matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) # 3x3 Matrix
list_of_lists2 = np.array([[1, 5, 6], [7, 5, 9], [1, 2, 9]]) # 3x3 Matrix
print(equal_values_matrix_numpy(matrix1, list_of_lists2)) # Should display an identity matrix
assert np.array_equal(equal_values_matrix_numpy(matrix1, list_of_lists2), [[1, 0, 0], [0, 1, 0], [0, 0, 1]])

[[1 0 0]
 [0 1 0]
 [0 0 1]]


In [26]:
def equal_values_matrix_numpy(matrix1: np.ndarray, matrix2: np.ndarray):
    return (matrix1 == matrix2).astype(int)

# Tests
matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) # 3x3 Matrix
list_of_lists2 = np.array([[1, 5, 6], [7, 5, 9], [1, 2, 9]]) # 3x3 Matrix
print(equal_values_matrix_numpy(matrix1, list_of_lists2)) # Should display an identity matrix
assert np.array_equal(equal_values_matrix_numpy(matrix1, list_of_lists2), [[1, 0, 0], [0, 1, 0], [0, 0, 1]])

[[1 0 0]
 [0 1 0]
 [0 0 1]]


## Function that receives a matrix and finds its saddle points (row maxima that are column minima or vice versa). Should return a list of tuples with the coordinates of the saddle points.

### Solutions with lists

In [27]:
def find_saddle_points(matrix, only_max_row=True):
    '''only_max_row=False: Searches for row maxima that are column minima and also row minima that are column maxima'''
    saddle_points = set()
    
    for row_index, row in enumerate(matrix): # For each row
        
        # Search for row maxima that are column minima
        
        max_row = max(row) # Maximum of that row
        indices_max_row = [i for i,_ in enumerate(row) if row[i]==max_row] # Indices where that maximum is located
        
        for i in indices_max_row: # For each index where the row maximum is located
            min_col_i = min([row[i] for row in matrix]) # Minimum of the column of that maximum
            if max_row == min_col_i: # If the row maximum equals the column minimum where it's located
                saddle_points.add((row_index, i))
            
        if not only_max_row: # And in this case, also search for column minima that are row maxima
            min_row = min(row) # Minimum of that row
            indices_min_row = [i for i, _ in enumerate(row) if row[i]==min_row] # Indices where that minimum is located
            
            for i in indices_min_row:
                max_col_i = max([row[i] for row in matrix]) # Maximum of the column of that minimum
                if min_row == max_col_i:
                    saddle_points.add((row_index, i))
    
    return list(saddle_points)
        
        
        
def test_find_saddle_points():
    matrix1 = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    assert find_saddle_points(matrix1) == [(0,2)], "Test 1a failed"
    assert find_saddle_points(matrix1, only_max_row=False) == [(0,2), (2,0)], "Test 1b failed"

    matrix2 = [[1, 2, 3], [3, 2, 1], [2, 2, 2]]
    assert find_saddle_points(matrix2) == [(2,1)], "Test 2a failed"
    # In this case the same coordinate is both row maximum and column minimum and vice versa, with the set it's only added once
    assert find_saddle_points(matrix2, only_max_row=False) == [(2,1)], "Test 2b failed"
    
    # Test 3: empty matrix
    assert find_saddle_points([]) == [], "Test 3a failed"
    assert find_saddle_points([], only_max_row=False) == [], "Test 3b failed"

    # Test 4: all elements are equal (all are saddle points)
    matrix4 = [[5, 5, 5], [5, 5, 5]]
    assert sorted(find_saddle_points(matrix4)) == [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], "Test 4a failed"
    assert sorted(find_saddle_points(matrix4, only_max_row=False)) == [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], "Test 4b failed"
    
    matrix5 = [[1,2,3],[4,5,2],[3,4,5],[3,4,5],[4,5,5]]
    assert sorted(find_saddle_points(matrix5, only_max_row=False)) == [(4, 0)], "Test 5b failed"
    
    matrix6 = [[1,4,3,3,4],[2,5,4,4,5],[3,2,5,5,5]]
    assert sorted(find_saddle_points(matrix6, only_max_row=False)) == [(0, 4)], "Test 6b failed"
    
    matrix7 = [[1,2,6],[4,5,5],[3,4,5],[3,4,5],[4,5,5]]
    assert sorted(find_saddle_points(matrix7, only_max_row=False)) == [(1, 0), (1, 2), (2, 2), (3, 2), (4, 0), (4, 2)], "Test 7b failed"
    
    matrix8 = [[1,4,3,3,4],[2,5,4,4,5],[6,5,5,5,5]]
    assert sorted(find_saddle_points(matrix8, only_max_row=False)) == [(0, 1), (0, 4), (2, 1), (2, 2), (2, 3), (2, 4)], "Test 8b failed"

    print("Tests passed successfully")


test_find_saddle_points()

Tests passed successfully


The ability to consider or not row minima that are column maxima has been separated, since in most applications saddle points are interpreted only as row maxima that are column minima. They could also be calculated with the same algorithm by transposing the matrix.

This solution is designed to be easy to understand, but you can find other solutions here: 
https://exercism.org/tracks/python/exercises/saddle-points/solutions


### Solution with NumPy

In [28]:

def find_saddle_points(matrix: np.ndarray, only_max_row=True):
    if matrix.size == 0:
        return []
    
    saddle_points = set()

    for row_index, row in enumerate(matrix):
        max_row = row.max()     
        for i in [i for i,_ in enumerate(row) if row[i]==max_row]:
            if max_row == matrix[:,i].min():
                saddle_points.add((row_index, i))
            
        if not only_max_row:
            min_row = row.min()      
            for i in [i for i, _ in enumerate(row) if row[i]==min_row]:
                if min_row == matrix[:,i].max():
                    saddle_points.add((row_index, i))
    
    return list(saddle_points)

  
def test_find_saddle_points():
    matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    assert find_saddle_points(matrix1) == [(0,2)], "Test 1a failed"
    assert find_saddle_points(matrix1, only_max_row=False) == [(0,2), (2,0)], "Test 1b failed"

    matrix2 = np.array([[1, 2, 3], [3, 2, 1], [2, 2, 2]])
    assert find_saddle_points(matrix2) == [(2,1)], "Test 2a failed"
    # In this case the same coordinate is both row maximum and column minimum and vice versa, with the set it's only added once
    assert find_saddle_points(matrix2, only_max_row=False) == [(2,1)], "Test 2b failed"
    
    # Test 3: empty matrix
    assert find_saddle_points(np.array([])) == [], "Test 3a failed"
    assert find_saddle_points(np.array([]), only_max_row=False) == [], "Test 3b failed"

    # Test 4: all elements are equal (all are saddle points)
    matrix4 = np.array([[5, 5, 5], [5, 5, 5]])
    assert sorted(find_saddle_points(matrix4)) == [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], "Test 4a failed"
    assert sorted(find_saddle_points(matrix4, only_max_row=False)) == [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], "Test 4b failed"
    
    matrix5 = np.array([[1,2,3],[4,5,2],[3,4,5],[3,4,5],[4,5,5]])
    assert sorted(find_saddle_points(matrix5, only_max_row=False)) == [(4, 0)], "Test 5b failed"
    
    matrix6 = np.array([[1,4,3,3,4],[2,5,4,4,5],[3,2,5,5,5]])
    assert sorted(find_saddle_points(matrix6, only_max_row=False)) == [(0, 4)], "Test 6b failed"
    
    matrix7 = np.array([[1,2,6],[4,5,5],[3,4,5],[3,4,5],[4,5,5]])
    assert sorted(find_saddle_points(matrix7, only_max_row=False)) == [(1, 0), (1, 2), (2, 2), (3, 2), (4, 0), (4, 2)], "Test 7b failed"
    
    matrix8 = np.array([[1,4,3,3,4],[2,5,4,4,5],[6,5,5,5,5]])
    assert sorted(find_saddle_points(matrix8, only_max_row=False)) == [(0, 1), (0, 4), (2, 1), (2, 2), (2, 3), (2, 4)], "Test 8b failed"

    print("Tests passed successfully")
    
test_find_saddle_points()

Tests passed successfully
