In [None]:
# Install packages
%pip install networkx
%pip install numpy
%pip install tqdm
%pip install matplotlib
%pip install simanneal 

import sys# Start writing code here...

In [49]:
from starter import *
from anneal import *
import math
import multiprocess as mp
import copy, random
from collections import defaultdict, Counter
import itertools

In [50]:
class GraphPartitioner(Annealer):
    class StateObject:
        def __init__(self, k):
            self.k = 0
            self.p = [0 for _ in range(k)]
            self.pi = [0 for _ in range(k)]
            self.norm_b = 0
            self.cost_p, self.cost_w, self.cost_k = 0, 0, 0
            self.total_cost = 0
        def get_scores(self):
            return self.cost_w, self.cost_k, self.cost_p, self.total_cost
        
    def __init__(self, Graph: nx.Graph, k):
        
        self.G = Graph
        self.num_v = len(self.G.nodes)
        self.max_p = k
        self.state = self.StateObject(self.max_p)

        ### Below are all state variables
        self.k_way_random_part() # remember to add 1 to pi at the very end so no team is assigned 0
        
        b = [self.get_b_val(i) for i in range(len(self.state.p))]
        self.state.norm_b = np.linalg.norm(b)
        
        self.state.cost_p = math.exp(70 * self.state.norm_b)
        self.compute_cost_w_initial()
        self.compute_cost_k()
        self.state.total_cost = self.state.cost_k + self.state.cost_p + self.state.cost_w

        super(GraphPartitioner, self).__init__(self.state)  # important!
    
    def get_b_val(self, i):
        return self.state.p[i]/self.num_v - 1/self.state.k

    ### Sets up the teams in G and the b vector
    def k_way_random_part(self):
        self.state.pi = np.random.randint(self.max_p, size=len(self.G.nodes)) #  partition=0 is illegal
        self.state.k = len(set(self.state.pi))

        for v in range(self.num_v):
            self.state.p[self.state.pi[v]] += 1

    def compute_cost_k(self):
        self.state.cost_k = 100 * math.exp(self.state.k * 0.5)

    def compute_cost_p_on_swap(self, i, j):
        b_i, b_j = self.get_b_val(i), self.get_b_val(j)
        new_norm_b_squared = self.state.norm_b**2 - b_i**2 - b_j**2 + (b_i-1/self.num_v)**2 + (b_j+1/self.num_v)**2
        
        if math.isclose(new_norm_b_squared, 0, abs_tol=1e-9):
            new_norm_b_squared = 0
        self.state.norm_b = new_norm_b_squared ** 0.5
        self.state.cost_p = math.exp(70 * self.state.norm_b)
    
    def compute_cost_p_from_scratch(self):
        b = [i/self.num_v - 1/self.state.k for i in self.state.p if i != 0]
        self.state.norm_b = np.linalg.norm(b)
        self.state.cost_p = math.exp(70 * self.state.norm_b)
        
    def compute_cost_p(self, new):
        pass

    def compute_cost_w_initial(self):
        for u, v in self.G.edges:
            if self.state.pi[u] == self.state.pi[v]:
                self.state.cost_w += self.G[u][v]['weight']

    def compute_cost_w(self, u, i , j):
        for v in self.G[u]:
            if self.state.pi[v] == i:
                self.state.cost_w -= self.G[u][v]['weight']
            elif self.state.pi[v] == j:
                self.state.cost_w += self.G[u][v]['weight']

    def compute_total_cost(self, v, i, j):
        ### Compute new cost_k if needed
        k_changed = False
        if self.state.p[i] == 1: 
            self.state.k -= 1
            k_changed = True
        if self.state.p[j] == 0:
            self.state.k += 1
            k_changed = True
        if k_changed:
            self.compute_cost_k()
            self.state.p[i] -= 1
            self.state.p[j] += 1
            self.compute_cost_p_from_scratch()
        else:
            self.compute_cost_p_on_swap(i, j)
            self.state.p[i] -= 1
            self.state.p[j] += 1

        self.compute_cost_w(v, i, j)
        
        self.state.pi[v] = j

        self.state.total_cost = self.state.cost_k + self.state.cost_p + self.state.cost_w
        return self.state.total_cost

    def swap_one_vertex(self):
        v = np.random.randint(self.num_v)
        i = self.state.pi[v]
        choices = [i for i in range(self.max_p)if self.state.p[i] != 0]
        if i in choices:
            choices.remove(i)
        j = choices[np.random.randint(self.state.k-1)]

        # j = 0 if i else 1
        self.compute_total_cost(v, i, j)

    def add_one(self, reverse = False):
        for v in self.G.nodes:
            self.state.pi[v] += 1 * (-1) ** reverse
    
    def get_scores(self):
        return self.state.get_scores()

    def energy(self):
        return self.state.total_cost

    def move(self):
        NUM_MOVES = 1
        # for _ in range(NUM_MOVES):
        self.swap_one_vertex()

    def apply_pi(self):
        for v in range(self.num_v):
            self.G.nodes[v]['team'] = self.state.pi[v] + 1
            
    def update(self, step, T, E, acceptance, improvement):
        pass
            
def apply_pi(G, pi):
    num_v = len(G.nodes)
    assert num_v == len(pi)
    max_p = max(pi)

    p = list(Counter(pi))
    p.sort()
    map_p = {p[i]:i for i in range(len(p))}
    for v in range(num_v):
        G.nodes[v]['team'] = int(map_p[pi[v]] + 1)
    return G       

In [75]:
def run(solver, in_file: str, out_file: str, file_name, overwrite,df,target,target_k=None):
    instance = read_input(in_file)
    
    cost, k, pi, state = solver(instance,in_file, out_file, file_name,target,target_k)
    if not math.isclose(score(instance), cost):
        print(score(instance, separated=True))
        print(state.get_scores())
        return instance, pi, state
        # print(pi)
        # print([instance.nodes[v]['team'] for v in instance.nodes])
        
    r = df.index[df['name'] == file_name]
    overwrite = cost < float(df.iloc[r]['score']) - 10**-4
    # print(cost,float(df.iloc[r]['score']) )
    if overwrite:
        write(instance, in_file, out_file, file_name, cost, k, overwrite)
        
    return pi

def save_and_plot(files, scores, output_name):
    counts, bins = np.histogram(scores)
    plt.stairs(counts, bins)
    
    scores_and_names = list(zip(files, scores))
    scores_and_names.sort()
    cat_size = len(scores)//3
    
    second = lambda x: x[1]
    large = sum(map(second, scores_and_names[:cat_size]))/cat_size
    medium = sum(map(second, scores_and_names[cat_size:cat_size*2]))/cat_size
    small =  sum(map(second, scores_and_names[cat_size*2:]))/cat_size
    
    os.makedirs('scores/' + output_name, exist_ok=True)  
    f = open('scores/' + output_name + '/avg_scores.txt', "w")
    f.write("small_avg: {0} \nmedium_avg: {1} \nlarge_avg: {2}".format(small, medium, large))
    f.close()
    
    df = pd.DataFrame(zip(files, scores), columns=['name', 'score'])
    df.to_csv('scores/' + output_name + '/out.csv')

def run_encapsulate(inp):
    file, solver, in_dir, out_dir,overwrite,df,target,target_k = inp
    r = df.index[df['name'] == file]
    
    err = run(solver, str(Path(in_dir) / file), str(Path(out_dir) / f"{file[:-len('.in')]}.out"), file, overwrite,df,target,target_k)
    if err:
        return err[2]
    return
    
def run_all(solver, in_dir, out_dir, overwrite: bool=False):
    scores = defaultdict()
    files = [x for x in os.listdir(in_dir) if x.endswith('.in')]
    # files.sort(reverse=True)
    random.shuffle(files)
    for file in tqdm(files):
        ### only run if havent done so before
        r = df.index[df['name'] == file]
        if float(df.iloc[r]['score']) >= 200000:
            err = run(solver, str(Path(in_dir) / file), str(Path(out_dir) / f"{file[:-len('.in')]}.out"), file, overwrite)
            if err:
                return err[2]
        pass
    return None

def write(instance, in_file, out_file, file_name, cost, k, overwrite):
    df = pd.read_csv('best_scores.csv')
    r = df.index[df['name'] == file_name]
    df.loc[r, 'score'] = cost
    df.loc[r, 'k'] = k
    df.to_csv('best_scores.csv', index=False)
    print(f"{str(in_file)}: cost", score(instance))
    write_output(instance, out_file, overwrite)

In [52]:
tar('outputs', overwrite = True)

In [80]:
def solve(G: nx.Graph,in_file, out_file, file_name, target=1,target_k=None):
    NUM_ITER_OUT,NUM_ITER_IN = 5,20
    df = pd.read_csv('best_scores.csv')
    r = df.index[df['name'] == file_name]
    min_cost = float(df.iloc[r]['score'])
    
    num_p = target_k if target_k else int(df.iloc[r]['k'])
    print(file_name, target, num_p)

    pi = []
    best_k = 0
    best_state= None
    epsilon = 10**-5
    num_v = len(G.nodes)
    size_to_time = {100:.5, 300:.5, 1000:.5}
    time = size_to_time[num_v]
    found = False
    for k in range(num_p,num_p+1,1):
        for i in range(NUM_ITER_OUT):
            found =  min_cost <= target
            if found: break
            
            tourney = GraphPartitioner((G), k)
            tourney.set_schedule(tourney.auto(minutes=time, steps=1000))
            cost = tourney.energy()
            if cost < min_cost - epsilon:
                min_cost, pi, best_k = cost, tourney.state.pi.copy(), tourney.state.k
                apply_pi(G, pi)
                write(G, in_file, out_file, file_name, cost, k, True)
            
            

            for _ in range(NUM_ITER_IN):
                found =  min_cost <= target
                if found: break
                partitioning, cost = tourney.anneal()

                if cost < min_cost - epsilon:
                    min_cost, pi, best_k = cost, tourney.state.pi.copy(), tourney.state.k

                    apply_pi(G, pi)
                    write(G, in_file, out_file, file_name, cost, k, True)
                
                
            
        if found:
            return min_cost, best_k, pi, best_state
        print()

    apply_pi(G, pi)
    return min_cost, best_k, pi, best_state

In [81]:
df = pd.read_csv('best_scores.csv')

# pi = run(solve, 'inputs/small13.in', 'outputs/small13.out', 'small13.in', True,df)

In [None]:
name = 'small59'
df = pd.read_csv('best_scores.csv')
_ = run(solve, 'inputs/'+name+'.in', 'outputs/' +name+ '.out', name + '.in', True, df, 402518.3688118974)

In [15]:
r = df.index[df['name'] == 'large100.in']
int(df.iloc[r]['k'])
# G = read_input('inputs/small232.in')
# pi = [7, 4, 5, 9, 10, 1, 6, 9, 2, 5, 2, 11, 2, 11, 6, 1, 6, 2, 6, 11, 5, 5, 5, 3, 3, 3, 11, 1, 1, 3, 8, 9, 2, 10, 7, 7, 10, 7, 3, 8, 10, 3, 8, 4, 10, 4, 8, 1, 5, 6, 7, 7, 10, 2, 9, 11, 8, 4, 10, 1, 4, 9, 6, 5, 1, 1, 2, 6, 2, 8, 3, 7, 7, 7, 5, 4, 1, 5, 8, 6, 9, 3, 4, 3, 8, 4, 2, 11, 4, 10, 6, 8, 11, 10, 6, 9, 11, 9, 9, 11]
# apply_pi(G, pi)
# score(G)

3

In [78]:
def get_low_scoring_inps():
    f = open('low_scores.txt', 'r')
    f2 = open('k.txt', 'r')

    lines = f.readlines()
    lines_k = f2.readlines()
    names = []
    for line,line2 in zip(lines,lines_k):
        size, num, target = line.strip().split()
        k = line2.strip()
        if k: k = int(k)
        names.append((size+num+'.in',float(target),k))
    return names
low_scores = get_low_scoring_inps()

In [5]:
files = [x for x in os.listdir('inputs') if x.endswith('.in')]

In [82]:
pool = mp.Pool(mp.cpu_count())
df = pd.read_csv('best_scores.csv')

pool.map(run_encapsulate, [(file, solve, 'inputs', 'outputs',True, df,target,k) for file,target,k in tqdm(low_scores)])

pool.close()

100%|██████████| 30/30 [00:00<00:00, 12852.82it/s]

small153.inlarge84.insmall59.inlarge216.inmedium47.in    large94.in402518.0 24470.3402518.0280.0large47.in  24470.3  12 24470.3 11 12
 211

24470.3large160.in11

 
 14842.4 10
11






large129.in 14842.4 10









KeyboardInterrupt: 

small158.in 24471.2 11
small58.in 3313.9 

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=beec2354-b280-46f2-b093-65afd3bc9b88' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>