# DSCI 6003 Skills Test 1

1. Diagram the solution correctly
2. Write pseudocode
3. Write actual code
4. Test it

Partial solutions will be accepted. Plagiarism will result in serious consequences.

1) 

Write an algorithm using a completed Union-Find class to determine if a graph has a **cycle** in it. 

Consider the below graph. If all three nodes are connected, the graph has a cycle.

        0
        |  \
        |    \
        1-----2

The following UF array represents this closed cycle graph:

    index: 0   1   2

    value: 1   2   2

Your whole program should build several types of graphs, including those with cycles, and test for each.

In [65]:
from collections import defaultdict


class UF:
    """An implementation of union find data structure.
    It uses weighted quick union by rank with path compression.
    """

    def __init__(self, N):
        """Initialize an empty union find object with N items.

        Args:
            N: Number of items in the union find object.
        """

        self._id = list(range(N))
        self._count = N
        self._rank = [0] * N
        self._N = N
        self._symbol_to_index = {}
        self._index_to_symbol = {}
        self._edges = {} # source, destination

    def find(self, p):
        """Find the set identifier for the item p."""

        # For integer items, try to preserve natural 0--N order if
        # possible, even if the successive calls to find are not in
        # that order
        if isinstance(p, int) and p < self._N and \
           p not in self._index_to_symbol:
            self._symbol_to_index[p] = p
            self._index_to_symbol[p] = p
        else:
            # Non-integer items (e.g. string)
            self._symbol_to_index.setdefault(p, len(self._symbol_to_index))
            self._index_to_symbol.setdefault(self._symbol_to_index[p], p)
        i = self._symbol_to_index[p]
        if i >= self._N:
            raise IndexError('You have been exceeding the UF capacity')

        id = self._id
        while i != id[i]:
            id[i] = id[id[i]]   # Path compression
            i = id[i]
        return i

    def count(self):
        """Return the number of items."""

        return self._count

    def connected(self, p, q):
        """Check if the items p and q are on the same set or not."""

        return self.find(p) == self.find(q)

    def union(self, p, q):
        """Combine sets containing p and q into a single set."""

        if p not in self._edges:
            self._edges[p] = [q] 
        else:
            self._edges[p].append(q) # to make this more serious, you would need methods to handle requests of object names
        
        if q not in self._edges:
            self._edges[q] = [p]
        else:
            self._edges[q].append(p) # to make this more serious, you would need methods to handle requests of object names
                
        id = self._id
        rank = self._rank

        i = self.find(p)
        j = self.find(q)
        if i == j:
            return

        self._count -= 1
        if rank[i] < rank[j]:
            id[i] = j
        elif rank[i] > rank[j]:
            id[j] = i
        else:
            id[j] = i
            rank[i] += 1

    def find_cycles(self):
        adjt = []
        for k, aj in self._edges.iteritems():
            adj = []
            if aj:
                for e in aj:
                    adj.append(e)
            adjt.append(adj)


        def _find_cycles(v, adjacent, visited, parent):
            visited[v] = True
            for i, a in enumerate(adjacent[v]):
                if not visited[a]: 
                    return _find_cycles(a, adjacent, visited, v)
                elif a != parent:
                    return True
            return False


        visited = len(self._edges)*[False]
        for v, n in self._edges.iteritems():
            if not visited[v]:
                return _find_cycles(v, adjt, visited, -1)
        return False
  
    
    def get_components(self):
        """List of symbol components (as sets)"""
        d = defaultdict(set)
        for i, j in enumerate(self._id):
            d[self.find(self._index_to_symbol.get(j, j))].add(self._index_to_symbol.get(i, i))
        return list(d.values())

    def __str__(self):
        """String representation of the union find object."""
        return " ".join([str(x) for x in self._id])

    def __repr__(self):
        """Representation of the union find object."""
        return "UF(" + str(self) + ")"

In [66]:

'''
1.

    0---1---4
    |   |  
    |   |  
 5--2---3


'''


'''
2.

    0---1---3---5
    |   |  
    |   |  
    2---4


'''

'''
3. 

    0---3---2---4---1
    
    
'''


# case 1

#Initialize a list of length 10 as a UnionFind class

uf = UF(6)
uf.union(0,1)
uf.union(0,2)
uf.union(1,3)
uf.union(2,3)


print "case 1:"
print uf.find_cycles()


#Initialize a list of length 10 as a UnionFind class

uf2 = UF(6)
uf2.union(0,1)
uf2.union(0,2)
uf2.union(1,4)
uf2.union(2,4)
uf2.union(1,3)
uf2.union(3,5)

print "case 2:"
print uf2.find_cycles()

# case 3

#Initialize a list of length 10 as a UnionFind class

uf3 = UF(6)

uf3.union(0,3)
uf3.union(3,2)
uf3.union(2,4)
uf3.union(4,1)



print "case 3:"
print uf3.find_cycles()



[0, 1, 2, 3, 4, 5]
case 1:
True
case 2:
True
case 3:
False


2)

Implement Sillysort, with the following pseudocode:

    given an array of digits a of length N

    moving forward along a starting from the second position to the end:
        run _sillysort(a, N)

    define _sillysort(a, start_pos):
        set position = start_pos
        moving backwards along a from start_pos:
            if the a[position-1] is greater than a[position]:
                swap a[position-1] and a[position]



In [101]:
def sillysort(a):
    N = len(a)
    
    def _swap(a, b):
        return b, a
    
    def _sillysort(a, start):
        pos = start
        for i in xrange(pos, 0,-1):
            if a[i-1] > a[i]:
                 a[i-1], a[i] = a[i], a[i-1]
    
    for i in xrange(1, N):
        _sillysort(a, N-1)
    return a

In [111]:

for i in xrange(5):
    a = [random.randrange(0, 101, 1) for i in xrange(100)]
    assert sillysort(a) == sorted(a)
    # test the student's code here

3) 

Write a function that prints out all the different possible ways you can make change from a list of coins, given the amount N to make change for, the number of types M of infinitely available coins, and list of coin values C where $C=\{C_1,C_2,C_3,..,C_M\}$. You may choose any number of coins as long as their sum is equal to N.

You cannot just use a cartesian product and provide all combinations of coins whose value equals N. You must provide an answer that can solve for thousands or millions of different types of coins.


In [128]:
from collections import Counter
import copy

def find_combs(N, C):
    # store the combos in a local list
    
    combos = []
    def gen_combs(N, C, combos, combo=None, index=0):

        if combo==None:
            combo = Counter()
            
        csum = sum(k*v for k, v in combo.iteritems())
        
        if csum == N:
            combos.append(combo)
        else:
            # try all possible change choices
            for i in xrange(index, len(C)):            
                if csum+C[i] <=N:
                    # this is the one real weakness of this approach 
                    # - you have to spawn a copy
                    new_combo = copy.copy(combo)
                    new_combo[C[i]] += 1
                    gen_combs(N, C, combos, new_combo, i)
    gen_combs(N,C, combos)
    return combos

In [129]:
N = 5
C = [1, 2, 5]
find_combs(N, C)

[Counter({1: 5}),
 Counter({1: 3, 2: 1}),
 Counter({2: 2, 1: 1}),
 Counter({5: 1})]

4)

Suppose we have a dataset that has 5 features. Why might we use PCA or LDA before performing kNN? 

**The intuition of KNN breaks down as we move beyond 3 dimensions. We might be able to recover the utility of the algorithm by removing features. This can be done by using feature projection to project the data to a plane with 3 or fewer dimensions.**