# Algorithms and Data Structures

https://www.youtube.com/watch?v=8hly31xKli0

## Linear search

In [1]:
def linear_search(list, target):
  for i in range(0,len(list)):
    if list[i] == target:
      return i
  return None

def verify(index):
    if index is not None:
        print(f"Target found at index {index}")
    else:
        print(f"Target not found in list")

numbers = [1,2,3,4,5,6,7,8]
target = 1
result = linear_search(numbers, target)
verify(result)

Target found at index 0


## Binary search

Returns an index

In [2]:
def binary_search(list, target):
    first = 0
    last = len(list) - 1
    
    while first <= last:
        midpoint = (first + last)//2
        if list[midpoint] == target:
            return midpoint
        elif list[midpoint] < target:
            first = midpoint + 1
        else:
            last = midpoint - 1

def verify(index):
    if index is not None:
        print(f"Target found at index {index}")
    else:
        print(f"Target not found in list")

numbers = [1,2,3,4,5,6,7,8]
target = 8
result = binary_search(numbers, target)
verify(result)

Target found at index 7


## Recursive binary search

Returns True or False

In [3]:
def recursive_binary_search(list, target):
    if len(list) == 0:
        return False
    else:
        midpoint = len(list)//2
        if list[midpoint] == target:
            return True
        else:
            if list[midpoint] < target:
                return recursive_binary_search(list[midpoint+1:], target)
            else:
                return recursive_binary_search(list[:midpoint], target)
            
def verify(result):
    print("Target found: ", result)
    
numbers = [1,2,3,4,5,6,7,8]
target = 5
result = recursive_binary_search(numbers, target)
verify(result)

Target found:  True


## Arrays

In Python creating a list allocates continuous memory for pointers
Accessing a value via subscript cal

### Search in Python

- `in` operator - calls contain method on list type
- `for` loop - iterates manually through the list - like linear_search

Consider the cost of sorting when choosing between linear and binary search

Insert and delete are linear time
Append is constant time

In [4]:
new_list = [1,2,3]
result = new_list[0]
if 1 in new_list: print(True)

for n in new_list:
    if n == 1:
        print(True)
        break

True
True


### Linked Lists

Appropriate if more inserts/deletes than appends

Linked lists contain a node reference to the following node (double-linked lists keep a reference to the parent as well).

The first item is the "Head", the last the "Tail"

Linked lists are self-referential

An object for storing a single node of a linked list.

Node two attributes - data and the link to the next node in the list
LinkedList creates the linked list

Three ways to add items (nodes) to the list
- Add at head (prepend)
- Add at tail (append)
Linked lists are efficient when adding at head or tail 
- True insert

In [53]:
class Node:
    data = None
    next_node = None
    
    def __init__(self, data):
        self.data = data
        
    def __repr__(self):
        return f"Node data: {self.data}"
    
class LinkedList:
    
    """
    Linear data structure that stores values in nodes. The list maintains a reference to the first node, also called head. Each node points to the next node in the list

    Attributes:
        head: The head node of the list
    """

    def __init__(self):
        self.head = None
        # Maintaining a count attribute allows for len() to be implemented in
        # constant time
        self.__count = 0

    def is_empty(self):
        """
        Determines if the linked list is empty
        Takes O(1) time
        """

        return self.head is None

    def __len__(self):
        """
        Returns the length of the linked list
        Takesn O(1) time
        """

        return self.__count

    def add(self, data):
        """
        Adds new Node containing data to head of the list
        Also called prepend
        Takes O(1) time
        """

        new_head = Node(data, next_node=self.head)
        self.head = new_head
        self.__count += 1

    def search(self, key):
        """
        Search for the first node containing data that matches the key
        Returns the node or `None` if not found
        Takes O(n) time
        """

        current = self.head

        while current:
            if current.data == key:
                return current
            else:
                current = current.next_node
        return None

    def insert(self, data, index):
        """
        Inserts a new Node containing data at index position
        Insertion takes O(1) time but finding node at insertion point takes
        O(n) time.
        Takes overall O(n) time.
        """

        if index >= self.__count:
            raise IndexError('index out of range')

        if index == 0:
            self.add(data)
            return
        if index > 0:
            new = Node(data)
            position = index
            current = self.head

            while position > 1:
                current = current.next_node
                position -= 1

            prev_node = current
            next_node = current.next_node

            prev_node.next_node = new
            new.next_node = next_node

        self.__count += 1

    def node_at_index(self, index):
        """
        Returns the Node at specified index
        Takes O(n) time
        """

        if index >= self.__count:
            raise IndexError('index out of range')

        if index == 0:
            return self.head

        current = self.head
        position = 0

        while position < index:
            current = current.next_node
            position += 1

        return current

    def remove(self, key):
        """
        Removes Node containing data that matches the key
        Returns the node or `None` if key doesn't exist
        Takes O(n) time
        """

        current = self.head
        previous = None
        found = False

        while current and not found:
            if current.data == key and current is self.head:
                found = True
                self.head = current.next_node
                self.__count -= 1
                return current
            elif current.data == key:
                found = True
                previous.next_node = current.next_node
                self.__count -= 1
                return current
            else:
                previous = current
                current = current.next_node

        return None

    def remove_at_index(self, index):
        """
        Removes Node at specified index
        Takes O(n) time
        """

        if index >= self.__count:
            raise IndexError('index out of range')

        current = self.head

        if index == 0:
            self.head = current.next_node
            self.__count -= 1
            return current

        position = index

        while position > 1:
            current = current.next_node
            position -= 1

        prev_node = current
        current = current.next_node
        next_node = current.next_node

        prev_node.next_node = next_node
        self.__count -= 1

        return current


    def __iter__(self):
        current = self.head

        while current:
            yield current
            current = current.next_node


    def __repr__(self):
        """
        Return a string representation of the list.
        Takes O(n) time.
        """
        nodes = []
        current = self.head
        while current:
            if current is self.head:
                nodes.append("[Head: %s]" % current.data)
            elif current.next is None:
                nodes.append("[Tail: %s]" % current.data)
            else:
                nodes.append("[%s]" % current.data)
            current = current.next_node
        return  '-> '.join(nodes)

In practice linked lists rarely need to be created

## Merge-sort algorithm

Splits array into single element arrays
It then sorts and creates 2 element arrays, which are further sorted/merged pair-wise

**Divide and conquer**

**Sorts a list in ascending order and returns sorted list**

- Divide: find midpoint and divide into sublist
- Conquer: Recursively sort the sublists created in previous step
- Combine: Merge the sorted sub-lists created in the previous step

In [54]:
# Let's define a recursive merge sort function. As usual, it takes the
# list or sub-list that we want it to sort.
def merge_sort(values):
  # Our base case is the same as with Quicksort. If the list has zero or one
  # values, there's nothing to sort, so we return it as-is.
  if len(values) <= 1:
    return values
  # If we didn't return, it means we're in the recursive case. So first we need
  # to split the list in half. We need to know the index we should split on,
  # so we get the length of the list and divide it by 2. So for example if
  # there are 8 items in the list, we'll want an index of 4. But what if there
  # were an odd number of items in the list, like 7? We can't have an index of
  # 3.5, so we'll need to round down in that case. Since we're working in
  # Python currently, we can take advantage of a special Python operator that
  # divides and rounds the result down: the floor division operator. It
  # consists of a double slash.
  middle_index = len(values) // 2
  # Now we'll use the Python slice syntax to get the left half of the list.
  # We'll pass that list to a recursive call to the merge_sort function.
  left_values = merge_sort(values[:middle_index])
  # We'll also use slice syntax to get the right half of the list, and pass
  # that to merge_sort as well.
  right_values = merge_sort(values[middle_index:])
  # Now we need to merge the two halves together, and sort them as we do it.
  # We'll create a list to hold the sorted values.
  sorted_values = []
  # Now we get to the complicated part - merging the two halves together and
  # sorting them as we do it.
  # We'll be moving from left to right through the left half of the list,
  # copying values over to the sorted_values list as we go. This left_index
  # variable will help us keep track of our position.
  left_index = 0
  # At the same time, we'll also be moving from left to right through the right
  # half of the list and copying values over, so we need a separate
  # right_index variable to track that position as well.
  right_index = 0
  # We'll keep looping until we've processed all of the values in both halves
  # of the list.
  while left_index < len(left_values) and right_index < len(right_values):
    # We're looking to copy over the lowest values first. So first we test
    # whether the current value on the left side is less than the value on the
    # right side.
    if left_values[left_index] < right_values[right_index]:
      # If the left side value is less, that's what we'll copy to the sorted
      # list.
      sorted_values.append(left_values[left_index])
      # And then we'll move to the next value in the left half of the list.
      left_index += 1
    # Otherwise, the current value from the right half must have been lower.
    else:
      # So we'll copy that value to the sorted list instead.
      sorted_values.append(right_values[right_index])
      # And then we'll move to the next value in the right half of the list.
      right_index += 1
  # At this point one of the two unsorted halves still has a value remaining,
  # and the other is empty. We won't waste time checking which is which.
  # We'll just copy the remainder of both lists over to the sorted list.
  # The one with a value left will add that value, and the empty one will add
  # nothing.
  sorted_values += left_values[left_index:]
  sorted_values += right_values[right_index:]
  # All the numbers from both halves should now be copied to the sorted list,
  # so we can return it.
  return sorted_values

## Merge sort on linked lists

Sorts a linked list in ascending order
- Recursively divide the linked list into sublists containing a single node
- Repeatedly merge the sublists to produce sorted sublists until one remains

Returns a sorted linked list

In [55]:
def merge_sort_ll(linked_list):
    """
    Sorts a linked list in ascending order
    - Recursively divide the linked list into sublists containing a single node
    - Repeatedly merge the sublists to produce sorted sublists until one remains

    Returns a sorted linked list

    Takes O(n log n) time
    Takes O(n) space
    """
    if linked_list.size() == 1:
        return linked_list
    elif linked_list.head is None:
        return linked_list

    left_half, right_half = split(linked_list)
    left = merge_sort_ll(left_half)
    right = merge_sort_ll(right_half)

    return merge(left, right)

def split_ll(linked_list):
    """
    Divide the unsorted list at midpoint into sublists
    Takes O(log n) time
    """
    if linked_list == None or linked_list.head == None:
        left_half = linked_list
        right_half = None

        return left_half, right_half
    else:
        size = linked_list.size()
        mid = size//2
        mid_node = linked_list.node_at_index(mid-1)

        left_half = linked_list
        right_half = LinkedList()
        right_half.head = mid_node.next_node
        mid_node.next_node = None

        return left_half, right_half


def merge_ll(left, right):
    """
    Merges two linked lists, sorting by data in nodes
    Returns a new merged list
    Takes O(n) space
    Runs in O(n) time
    """

    # Create a new linked list that contains nodes from merging left and right
    merged = LinkedList()
    # Add a fake head that is discarded later.
    merged.add(0)
    # Set current to the head of the linked list
    current = merged.head

    # Obtain head nodes for left and right linked lists
    left_head = left.head
    right_head = right.head

    # Iterate over left and right as long until the tail node of both
    # left and right
    while left_head or right_head:
        # If the head node of left is None, we're at the tail
        # Add the tail node from right to the merged linked list
        if left_head is None:
            current.next_node = right_head
            # Call next on right to set loop condition to False
            right_head = right_head.next_node 
        # If the head node of right is None, we're at the tail
        # Add the tail node from left to the merged linked list
        elif right_head is None:
            current.next_node = left_head
            # Call next on left to set loop condition to False
            left_head = left_head.next_node
        else:
            # Not at either tail node
            # Obtain node data to perform comparison operations
            left_data = left_head.data
            right_data = right_head.data

            # If data on left is lesser than right set current to left node
            # Move left head to next node
            if left_data < right_data:
                current.next_node = left_head
                left_head = left_head.next_node
            # If data on left is greater than right set current to right node
            # Move right head to next node
            else:
                current.next_node = right_head
                right_head = right_head.next_node

        # Move current to next node
        current = current.next_node

    # Discard fake head and set first merged node as head
    head = merged.head.next_node
    merged.head = head

    return merged
  

In [56]:
l = LinkedList()
l.add(10)
l.add(2)
l.add(44)
l.add(15)
l.add(200)
print(l)
sorted_linked_list = merge_sort_ll(l)

TypeError: Node.__init__() got an unexpected keyword argument 'next_node'

In [36]:
l.add(10)