## Accomplished in this code:
1. Batches running without CUDA as concept verification. 

## Idea behind batches
 * We noticed that the size of the array sent down to the GPU was too large for global memory and could only think of two ways around it
    1. Create a device function in cuda that would generate the sub-graphs to check. I couldn't think of a way to do it and neither could Dr. Cook so this idea has been put on hold.
    1. Send down the sub-graphs in batches. I.e. the array of sub-graphs needed to check for a 40-vertice big-graph and R(3,10) is 10 times too big to fit. So, we're going to send down one tenth of it, then the next tenth and etc til we check them all. This is very slow compared to what we were doing but some optimization of memory transfer should help.

In [23]:
from setup import *

Ramsey = [3,5]
num_vertices = 15
num_threads = 5
beta = 1
num_steps = 10**5
main(Ramsey, num_vertices, num_threads, beta, num_steps)

(2,)
2 colors


NameError: name 'find_problems' is not defined

In [22]:
def main(Ramsey, num_vertices, num_threads, beta, num_steps):

    def choose(n,k):
        return int(math.factorial(n)/math.factorial(k)/math.factorial(n-k))
    
    def min_red_edges(Ramsey, num_vertices):
        sorted_Ramsey = np.sort(Ramsey)
        k = sorted_Ramsey[1]
        if len(sorted_Ramsey) != 2 or sorted_Ramsey[0] != 3:
            return(0)
        elif num_vertices <=2*k:
            return(num_vertices-k)
        elif num_vertices <= 5*k/2:
            return(3*num_vertices - 5*k)
        else:
            return(5*num_vertices - 10*k)
    Ramsey.sort()
    Ramsey = Ramsey[::-1]
    num_colors = np.int(len(Ramsey))
    Colors = np.arange(num_colors).astype("int")
    Colors = np.asarray([int(Colors[i]) for i in range(len(Colors))]).astype("int")
    Vertices = np.arange(num_vertices).astype("int")
    Edges = list(it.combinations(Vertices,2))
    #reverse lookup for edges below.  Eg if slot 3 above contains edge (2,5), the the dict below has entry (2,5):3
    Edges_idx = dict((edge, idx) for idx,edge in enumerate(Edges)) 
    num_edges = len(Edges)
    max_bytes = 4.2*10**9
    #This is based off of the max global memory found in Lannister. It is just under 4.3 gigs. So I'm only allowing 
    #4.2 gigs of memory to used up with edges of cliques to check

    #This code evenly distributes work to threads so that they all finish at about the same time
    #An int is 4 bytes. We should bring this down sometime in the future to an unsigned short for memory reasons
    size_of_edge = 4
    vertices_per_clique = Ramsey
    edges_per_clique = [choose(v,2) for v in vertices_per_clique] 
    bytes_per_clique = size_of_edge*edges_per_clique
    cliques_per_color = np.asarray([choose(num_vertices,v) for v in vertices_per_clique])
    edges_per_color = edges_per_clique * cliques_per_color
    bytes_per_color = size_of_edge*edges_per_color
    total_bytes = bytes_per_color.sum()
    computations_per_color = edges_per_color / edges_per_color.sum()
    num_batches = np.int(2)#np.ceil(total_bytes/max_bytes)
    
    threads_per_color = np.ones(num_colors).astype('int')
    remaining_threads = num_threads - threads_per_color.sum()
    threads_per_color += np.floor(remaining_threads * computations_per_color).astype('int')
    
    cliques_per_thread = np.ceil(cliques_per_color / threads_per_color).astype('int')
    
    cliques_per_batch = np.ceil(cliques_per_thread/num_batches).astype("int")

    assign_Threads_to_Cliques = [-1*np.ones([threads_per_color[color],
                                             cliques_per_batch[color],
                                             edges_per_clique[color]],
                                            dtype='int')
                                for color in Colors]
    #assign_batches assigns the correct cliques based on the batch number that the algorithm is on. This method
    #makes it easy to just update the batch number 
    def assign_batches():
        for color, clique_size in enumerate(Ramsey):        
            num_cliques = cliques_per_batch[color]
            num_threads = threads_per_color[color]
            #We're sending batches down to the GPU. I.e. if we're sending down the second batch of three, we're 
            #making a generator that returns [clique-1,clique-4,clique-7,...]. Recall, Cliques is a generator that
            #returns [clique-0,clique-1,clique-2,...].
            Cliques = it.combinations(Vertices,clique_size)
            Batches = it.islice(Cliques, batch_num, None, num_batches)
            #_ = [next(Cliques) for i in range(np.int32(batch_num*num_cliques))]
            #Makes the vector [0,1,2,...,num_threads-1,0,1,2,...,num_threads-1,....] of length num_cliques        
            assign_Cliques_to_Threads = np.arange(num_cliques) % num_threads
            #randomizes assignment, but maintains clique counts
            np.random.shuffle(assign_Cliques_to_Threads)
            #A vector that knows where the next open slot on each thread is
            next_open_slot = np.zeros(num_threads,dtype='int')
            for clique_Vertices, thread in zip(Batches,assign_Cliques_to_Threads):
                #Gets the list of edges in this clique
                clique_Edges = list(it.combinations(clique_Vertices,2))
                #Converts it to edge_idx
                clique_Edges_idx = [Edges_idx[edge] for edge in clique_Edges]
                #print(clique_Edges_idx)
                #Writes it to the correct thread and next open slot on that thread
                assign_Threads_to_Cliques[color][thread,next_open_slot[thread]] = clique_Edges_idx
                next_open_slot[thread] += 1


    #print("edges per clique");print(edges_per_clique);print("cliques per color");print(cliques_per_color);print("edges per color");print(edges_per_color);print("threads per color");print(threads_per_color)
    """
    for color in Colors:        
        print("color");print(color);print("threads per color");print(threads_per_color[color]);print("cliques per thread");print(cliques_per_thread[color]);print("edges per clique");print(edges_per_clique[color])    
        #for thread in range(threads_per_color[color]):
        #    display(assign_Threads_to_Cliques[color][thread])
        display(np.array(assign_Threads_to_Cliques[color]))#[thread]))
    """
    #This function takes the Problems array and the num_problems_total and returns both of them. The reason for 
    #this is that the comparing technique used to compare the find_problems_cuda function and the find_problems_pandas 
    #function makes sure that both of algorithms identify the exact same problems 
    #def find_problems(coloring, printout=False):
    #    num_problems_total = 0
    #    for color in Colors:
    #        X = coloring[assign_Threads_to_Cliques[color]]        
    #        Problems[batch_num][color] = np.all(X == color,axis=2)
    #        num_problems_total += Problems[color].sum()
    #        if printout == True:
    #            print(color)
    #            XF = pd.DataFrame(X.reshape(-1, X.shape[-1]))
    #            ProblemsF = Problems[color].reshape(-1)
    #            XF['problem'] = ProblemsF
    #            display(XF)
    #            display(XF[ProblemsF])
    #    return num_problems_total, Problems
    
    compare = np.full_like(assign_Threads_to_Cliques, fill_value=num_colors)
    print(np.array(assign_Threads_to_Cliques).shape)
    #for block in range(num_blocks):
    #    compare[block,:,:block_edges_per_clique[block]] = block_color[block]
    
    def find_problems_pandas(coloring, printout=False):
        for i in range(num_batches):
            batch_num = i
            assign_batches()
            num_problems_,current = find_problems(Problems_current, num_problems_per_batch_current)
        X = coloring[assign_Threads_to_Cliques]
        Y = (X == compare)
        Problems = np.all(Y,axis=-1)
        if printout == True:
            print_problems(Problems)
        return Problems.sum().astype('int'), Problems
    
    if num_colors == 2:
        print("2 colors")
        def flip():        
            coloring[edge_idx] += 1
            coloring[edge_idx] %= 2        
            return
    else:
        print("more than 2 colors")
        nc = num_colors-1
        def flip():
            new_color = np.random.randint(0,nc)
            if new_color >= coloring[edge_idx]:
                new_color += 1
            coloring[edge_idx] = new_color
            return
        
    def increase_red_edges():
        while(list(coloring_cpu).count(red)<min_red):
            idx = [i for i in range(len(coloring_cpu)) if coloring_cpu[i]!=red]
            coloring_cpu[random.choice(idx)] = red
            
    coloring = np.random.choice(Colors, size=num_edges+1, replace=True)
    coloring[-1] = 1 + Colors[-1]
    #They are this shape because it enables us to use a simple and quick algorithm inside find_problems
    Problems_current, Problems_proposed = 0*assign_Threads_to_Cliques, 0*assign_Threads_to_Cliques
    #These arrays keep track of the number of problems in a batch. So, if there are three batches it will look like:
    #[4,13,8] where [problems in batch 1, problems in batch 2, problems in batch 3]
    num_problems_current = np.array(np.zeros(num_batches)).astype("int")#np.zeros(num_colors*num_batches),(num_batches,num_colors)).astype('int')
    num_problems_proposed = num_problems_current
    
    num_problems_best = num_problems_current
    coloring_best = coloring
    
    Problems_current =  [np.zeros(assign_Threads_to_Cliques[color].shape[:-1]).astype('bool') for color in Colors]
    #num_problems_per_color_current = np.zeros(num_colors).astype('int')
    Problems_proposed = [np.zeros(assign_Threads_to_Cliques[color].shape[:-1]).astype('bool') for color in Colors]
    #num_problems_per_color_proposed = np.zeros(num_colors).astype('int')
    #For each batch, figure out the number of problems
    num_problems_current = find_problems_pandas(coloring)
    
    #for i in range(num_batches):
    #    batch_num = i
    #    assign_batches()
    #    num_problems_per_batch_current = find_problems(Problems_current, num_problems_per_batch_current)#, printout=True)
    step = -1
    #print(step,num_problems_per_batch_current)#,num_problems_current)
    for step in range(num_steps):
        #print()
        #this code moves to the next batch under two conditions: One, if the current batch has no problems and 
        #Two, if the number of steps determined for update have been stepped
        #if step%steps_between_swaps ==0:
        #    If step_between_batches is 10 and num_batches = 3, then, the process starts with batch_num = 0 on step 0,
        #    then, on step 10, batch_num turns to 1 and, on step 20, batch_num = 2 and, on step 30, batch_num = 0 and etc. 
        #    batch_num = np.int32(step%num_batches)
        #    assign_batches()
        #elif num_problems_per_batch_current[batch_num]==0:
        #    batch_num = (batch_num+1)%num_batches
        #    assign_batches()
        if num_problems_best == 0:
            break
        edge_idx = np.random.randint(num_edges)
        edge_color_current = coloring[edge_idx]    
        flip()
        num_problems_proposed = find_problems_pandas(coloring)#, printout=True)
        num_problems_diff = np.sum(num_problems_current - num_problems_proposed)
        #print(num_problems_diff)
        if num_problems_diff <= 0:            
            num_problems_current = num_problems_proposed
            if num_problems_proposed < num_problems_best:
                num_problems_best = num_problems_proposed
                coloring_best = coloring
        else:            
            accept = np.exp(beta * num_problems_diff)            
            r = np.random.random()
            #print("Proposed is worse")
            #print("But I will accept it anyway I draw a number less than %.3f.  I drew %.3f." % (accept,r))            
            if r <= accept:
                #accept the move
                
                #print("So I accept the move even though it is worse.")
                num_problems_current = num_problems_proposed
                
                #num_problems_per_batch_current[batch_num] = num_problems_per_currentproposed[batch_num]
            else:
                #reject the move
                
                #print("So I reject.")
                
                coloring[edge_idx] = edge_color_current
        
        #print("Step: ",step,"Batch: ", batch_num,"Problems: ",num_problems_per_batch_current)#coloring[:-1],)#,num_problems_current)
        
        #If we ever get to the point where there appears to be no problems, this codes saves the current spot batch
        #number that we're on then double checks the entire sub-graph. It does this because it is possible for one batch
        #to be optimized to zero problems and then the optimization of another batch creates one in the batch
        #if num_problems_best.sum() == 0:
        #    for batch in range(num_batches):
        #        batch_num = batch
        #        assign_batches()
        #        num_problems_per_batch_current = find_problems(Problems_current, num_problems_per_batch_current)
        #   if num_problems_per_batch_current.sum() == 0:
        #        break
    #for batch in range(num_batches):
    #    batch_num = batch
    #    assign_batches()
    #    num_problems_per_batch_current = find_problems(Problems_current, num_problems_per_batch_current)
    coloring = coloring_best
    num_problems = find_problems_pandas(coloring)
    return coloring, num_problems

In [6]:
#Example of how the batches work. Note, the first batch has a batch_num of zero and why this is so should be obvious
#from the following code.
sequence = (i for i in range(10**5))
batch_num = 1
num_batches = 3
batches = it.islice(sequence, batch_num, None, num_batches)
for j in range(10):
    print(next(batches))

1
4
7
10
13
16
19
22
25
28
