## Data Structure Notes

In [38]:
# Data types

print("\n Numeric")
print("1", type(1))
print("1.", type(1.))
print("1.0", type(1.))
print("3j", type(3j))

print("\n Boolean")
print("bool(1)", type(bool(1)))
print("bool(0)", type(bool(0)))
print("False", type(False))

print("\n Sequence") 
print("'1'", type('1'))
print("'a'", type('a'))
print("range(3)", type(range(3)))
print("[1,2,3]", type([1,2,3]))
print("(1,2,3)", type( (1,2,3)))

print("\n Mapping")
print("{'a':1,'b':2,'c':3}" , type({'a':1,'b':2,'c':3}))

print("\n Set")
print("{1,2,3}", type({1,2,3}))
print("frozenset((1,2,3))", type(frozenset((1,2,3))))



 Numeric
1 <class 'int'>
1. <class 'float'>
1.0 <class 'float'>
3j <class 'complex'>

 Boolean
bool(1) <class 'bool'>
bool(0) <class 'bool'>
False <class 'bool'>

 Sequence
'1' <class 'str'>
'a' <class 'str'>
range(3) <class 'range'>
[1,2,3] <class 'list'>
(1,2,3) <class 'tuple'>

 Mapping
{'a':1,'b':2,'c':3} <class 'dict'>

 Set
{1,2,3} <class 'set'>
frozenset((1,2,3)) <class 'frozenset'>


#### Other details
Immutable objects:  String, Integer, Float, Range, Tuple, Unicode
Mutable objects:  List, Dictionary, Set, 

#### Membership
* in
* not in

#### Identity
* is
* is not

#### Logical Operators
* AND
* OR
* NOT

#### Comparison Operator
* See list online - there are many



#### Common Dictionary Methods
* The dictionary data type is mutable and dynamic.  

mydict.clear()
mydict.get(<key>)
mydict.items()
mydict.keys()
mydict.values()
mydict.pop()
mydict.popitem()
mydict.update(<obj>)

#### Common Sets Methods
* A set is an unordered collection of hashable objects. It is iterable, mutable, and has unique elements. The order of the elements is also not defined. While the addition and removal of items are allowed, the items themselves within the set must be immutable and hashable. Sets support membership testing operators (in, not in), and operations such as intersection, union, difference, and symmetric difference. Sets cannot contain duplicate items.  

set1.union(set2)  
set1.intersection(set2)  
set1.difference(set2)  
set1.symmetric_difference(set2)
set1.issubset(set2)


**Note:** Immutable sets are known as frozensets.  Except for this one difference, they are the same.  

Here is a good example that shows how sets can only hold immutable objects.  A set of sets would not work because the individual sets are mutable, but a set of frozensets would work due to the frozensets immutabilty property.  


In [39]:
# Using just a set
a11 = set(['data'])
a21 = set(['structure'])
a31 = set(['python'])
x1 = {a11, a21, a31}


TypeError: unhashable type: 'set'

In [40]:
# Using a frozenset
a1 = frozenset(['data'])
a2 = frozenset(['structure'])
a3 = frozenset(['python'])
x = {a1, a2, a3}
print(x)


{frozenset({'python'}), frozenset({'data'}), frozenset({'structure'})}


#### Collections
The collections module provides different types of containers, which are objects that are used to store different objects and provide a way to access them.  

Access this module via `from collections import _____`  

Types:  
namedTuple  
deque  
defaultdict  
ChainMap  
Counter  
UserDict, UserList, UserString  

### Algorithm Development

Important Factors:
- Time Complexity
- Space Complexity (Memory Usage)
- Asymptotically Efficiency (Rate of Growth)
- Amortized Analysis

Types of Notation:
- $theta - worst-case running time complexity with a tight bound.
- O - worst-case running time complexity with an upper bound
- $omega - lower bound of an algorithm’s running time  

Use Big O Notations:
- 0(1) : Constant
- O(logn)  :  Logarithmic
- O(n)   :  Linear
- (nlogn)   :  Linear-logarithmic
- O(n^2)   :  Quadriatic
- O(n^3)   :  Cubic
- O(2n)    :  Exponential

Total Time Complexity
- Simplified notation


### Algorithms  
- Brute-force - try all possible solutions

#### Recursion
- Search for condition to be true then stops but for each loop it modifies the previous result and calls itself


In [39]:
# Not Recursive - Factorial

def fact(n, tot=1):
    if n == 0:
        return tot
    else:
        tot *= n
        return fact(n-1, tot)
    
print(fact(4))

24


In [40]:
# Recursive - Factorial

def fact(n):
    if n == 0:
        return 1
    else:
        return n*fact(n-1)
    
print(fact(4))

24


#### Divide and conquer

In [38]:
# - binary search - 0(logn)

def binary_search(arr, start, end, key):
    loop = 0
    while start <= end: 
        loop += 1
        mid = start + int((end - start)/2)
        if arr[mid] == key:  
            return (mid, loop)  
        elif arr[mid] < key:  
            start = mid + 1  
        else:  
            end = mid - 1  
    return (None,loop)  

arr = [4, 6, 9, 13, 14, 18, 21, 24, 38] 
x = 6
result = binary_search(arr, 0, len(arr)-1, x)  
print(f'Binary Search: Index of {x}: {result[0]}.  Evaluated in {result[1]} loops')

In [38]:
# - merge sort - O(nlogn)

# merge
def merge(first_sublist, second_sublist): 
    i = j = 0
    merged_list = []
    while i < len(first_sublist) and j < len(second_sublist):
        if first_sublist[i] < second_sublist[j]:
            merged_list.append(first_sublist[i]) 
            i += 1 
        else:
            merged_list.append(second_sublist[j]) 
            j += 1
    while i < len(first_sublist): 
        merged_list.append(first_sublist[i]) 
        i += 1 
    while j < len(second_sublist):
        merged_list.append(second_sublist[j]) 
        j += 1
    return merged_list 

# merge sort
def merge_sort(unsorted_list): 
    
    # stop trigger
    if len(unsorted_list) == 1: 
        return unsorted_list
    
    # split list
    mid_point = int(len(unsorted_list)/2)
    first_half = unsorted_list[:mid_point] 
    second_half = unsorted_list[mid_point:] 
    
    # recursive function
    half_a = merge_sort(first_half) 
    half_b = merge_sort(second_half) 
    
    # takes smallest value (most to left)
    return merge(half_a, half_b) 

print("Merge Sort of List: ", merge_sort([10,9,3,5,2,1]))

#### Other Algorithms
- quick sort
- algo for fast multiplication
- strassen's matrix multiplication
- closes pair of points

### Dynamic programming

In [42]:
# Fibonacci Sequence - Recursive
def fib(n):   
     if n <= 1:   
        return 1   
     else:  
        return fib(n-1) + fib(n-2)  
for i in range(5):
    print(fib(i))

1
1
2
3
5


In [43]:
# Fibonacci Sequence - Dynamic
def dyna_fib(n):
    if n == 0:
        return 0
    if n == 1:
        return 1  
    # stored calculation lookup
    if lookup[n] is not None:
        return lookup[n]
  
    lookup[n] = dyna_fib(n-1) + dyna_fib(n-2)
    return lookup[n]
lookup = [None]*(1000)
 
for i in range(6): 
    print(dyna_fib(i))

0
1
1
2
3
5


### Greedy algorithms

#### Shortest distance

In [2]:
# Dijstra's Algorithm -  O(|Edges| + |Vertices|log|Vertices|)
def get_shortest_distance(table, vertex): 
    shortest_distance = table[vertex][DISTANCE] 
    return shortest_distance 

def set_shortest_distance(table, vertex, new_distance): 
    table[vertex][DISTANCE] = new_distance 

def set_previous_node(table, vertex, previous_node): 
    table[vertex][PREVIOUS_NODE] = previous_node 
    
def get_distance(graph, first_vertex, second_vertex): 
    return graph[first_vertex][second_vertex] 

def get_next_node(table, visited_nodes): 
    unvisited_nodes = list(set(table.keys()).difference(set(visited_nodes))) 
    assumed_min = table[unvisited_nodes[0]][DISTANCE] 
    min_vertex = unvisited_nodes[0] 
    for node in unvisited_nodes: 
        if table[node][DISTANCE] < assumed_min: 
            assumed_min = table[node][DISTANCE] 
            min_vertex = node 
    return min_vertex 


def find_shortest_path(graph, table, origin): 
    visited_nodes = [] 
    current_node = origin 
    starting_node = origin 
    while True: 
        adjacent_nodes = graph[current_node] 
        if set(adjacent_nodes).issubset(set(visited_nodes)): 
            # Nothing here to do. All adjacent nodes have been visited. 
            pass 
        else: 
            unvisited_nodes = set(adjacent_nodes).difference(set(visited_nodes)) 
            for vertex in unvisited_nodes: 
                distance_from_starting_node = get_shortest_distance(table, vertex) 
                if distance_from_starting_node == INFINITY and current_node == starting_node: 
                    total_distance = get_distance(graph, vertex, 
                                                  current_node) 
                else: 
                    total_distance = get_shortest_distance (table, 
                    current_node) + get_distance(graph, current_node, 
                                                 vertex) 
                if total_distance < distance_from_starting_node: 
                    set_shortest_distance(table, vertex, 
                                          total_distance) 
                    set_previous_node(table, vertex, current_node) 
        visited_nodes.append(current_node)
        #print(visited_nodes)
        if len(visited_nodes) == len(table.keys()): 
            break 
        current_node = get_next_node(table,visited_nodes) 
    return (table)


# ------------------------------------------

graph = dict() 
graph['A'] = {'B': 5, 'D': 9, 'E': 2} 
graph['B'] = {'A': 5, 'C': 2} 
graph['C'] = {'B': 2, 'D': 3} 
graph['D'] = {'A': 9, 'F': 2, 'C': 3} 
graph['E'] = {'A': 2, 'F': 3} 
graph['F'] = {'E': 3, 'D': 2} 


# DISTANCE = 0 
# PREVIOUS_NODE = 1 
# INFINITY = float('inf')

table = { 
    'A': [0, None], 
    'B': [float("inf"), None], 
    'C': [float("inf"), None], 
    'D': [float("inf"), None], 
    'E': [float("inf"), None], 
    'F': [float("inf"), None], 
}

shortest_distance_table = find_shortest_path(graph, table, 'A') 
for k in sorted(shortest_distance_table): 
     print("{} - {}".format(k,shortest_distance_table[k])) 

A - [0, None]
B - [5, 'A']
C - [7, 'B']
D - [7, 'F']
E - [2, 'A']
F - [5, 'E']


#### Other Algorithms
-  Kruskal’s minimum spanning tree
-  Dijkstra’s shortest path problem
-  The Knapsack problem
-  Prim’s minimal spanning tree algorithm
-  The traveling salesperson problem