In [None]:
# Install packages
%pip install networkx
%pip install numpy
%pip install tqdm
%pip install matplotlib
%pip install simanneal 

import sys# Start writing code here...

In [1]:
from starter import *
from anneal import *
import math
import multiprocess as mp
import copy, random
from collections import defaultdict, Counter
import itertools

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class GraphPartitioner(Annealer):
    class StateObject:
        def __init__(self, k):
            self.k = 0
            self.p = [0 for _ in range(k)]
            self.pi = [0 for _ in range(k)]
            self.norm_b = 0
            self.cost_p, self.cost_w, self.cost_k = 0, 0, 0
            self.total_cost = 0
        def get_scores(self):
            return self.cost_w, self.cost_k, self.cost_p, self.total_cost
        
    def __init__(self, Graph: nx.Graph, k):
        
        self.G = Graph
        self.num_v = len(self.G.nodes)
        self.max_p = k
        self.state = self.StateObject(self.max_p)

        ### Below are all state variables
        self.k_way_random_part() # remember to add 1 to pi at the very end so no team is assigned 0
        
        b = [self.get_b_val(i) for i in range(len(self.state.p))]
        self.state.norm_b = np.linalg.norm(b)
        
        self.state.cost_p = math.exp(70 * self.state.norm_b)
        self.compute_cost_w_initial()
        self.compute_cost_k()
        self.state.total_cost = self.state.cost_k + self.state.cost_p + self.state.cost_w

        super(GraphPartitioner, self).__init__(self.state)  # important!
    
    def get_b_val(self, i):
        return self.state.p[i]/self.num_v - 1/self.state.k

    ### Sets up the teams in G and the b vector
    def k_way_random_part(self):
        self.state.pi = np.random.randint(self.max_p, size=len(self.G.nodes)) #  partition=0 is illegal
        self.state.k = len(set(self.state.pi))

        for v in range(self.num_v):
            self.state.p[self.state.pi[v]] += 1

    def compute_cost_k(self):
        self.state.cost_k = 100 * math.exp(self.state.k * 0.5)

    def compute_cost_p_on_swap(self, i, j):
        b_i, b_j = self.get_b_val(i), self.get_b_val(j)
        new_norm_b_squared = self.state.norm_b**2 - b_i**2 - b_j**2 + (b_i-1/self.num_v)**2 + (b_j+1/self.num_v)**2
        
        if math.isclose(new_norm_b_squared, 0, abs_tol=1e-9):
            new_norm_b_squared = 0
        self.state.norm_b = new_norm_b_squared ** 0.5
        self.state.cost_p = math.exp(70 * self.state.norm_b)
    
    def compute_cost_p_from_scratch(self):
        b = [i/self.num_v - 1/self.state.k for i in self.state.p if i != 0]
        self.state.norm_b = np.linalg.norm(b)
        self.state.cost_p = math.exp(70 * self.state.norm_b)
        
    def compute_cost_p(self, new):
        pass

    def compute_cost_w_initial(self):
        for u, v in self.G.edges:
            if self.state.pi[u] == self.state.pi[v]:
                self.state.cost_w += self.G[u][v]['weight']

    def compute_cost_w(self, u, i , j):
        for v in self.G[u]:
            if self.state.pi[v] == i:
                self.state.cost_w -= self.G[u][v]['weight']
            elif self.state.pi[v] == j:
                self.state.cost_w += self.G[u][v]['weight']

    def compute_total_cost(self, v, i, j):
        ### Compute new cost_k if needed
        k_changed = False
        if self.state.p[i] == 1: 
            self.state.k -= 1
            k_changed = True
        if self.state.p[j] == 0:
            self.state.k += 1
            k_changed = True
        if k_changed:
            self.compute_cost_k()
            self.state.p[i] -= 1
            self.state.p[j] += 1
            self.compute_cost_p_from_scratch()
        else:
            self.compute_cost_p_on_swap(i, j)
            self.state.p[i] -= 1
            self.state.p[j] += 1

        self.compute_cost_w(v, i, j)
        
        self.state.pi[v] = j

        self.state.total_cost = self.state.cost_k + self.state.cost_p + self.state.cost_w
        return self.state.total_cost

    def swap_one_vertex(self):
        v = np.random.randint(self.num_v)
        i = self.state.pi[v]
        choices = [i for i in range(self.max_p)if self.state.p[i] != 0]
        if i in choices:
            choices.remove(i)
        j = choices[np.random.randint(self.state.k-1)]

        # j = 0 if i else 1
        self.compute_total_cost(v, i, j)

    def add_one(self, reverse = False):
        for v in self.G.nodes:
            self.state.pi[v] += 1 * (-1) ** reverse
    
    def get_scores(self):
        return self.state.get_scores()

    def energy(self):
        return self.state.total_cost

    def move(self):
        NUM_MOVES = 1
        # for _ in range(NUM_MOVES):
        self.swap_one_vertex()

    def apply_pi(self):
        for v in range(self.num_v):
            self.G.nodes[v]['team'] = self.state.pi[v] + 1
            
    def update(self, step, T, E, acceptance, improvement):
        pass
            
def apply_pi(G, pi):
    num_v = len(G.nodes)
    assert num_v == len(pi)
    max_p = max(pi)

    p = list(Counter(pi))
    p.sort()
    map_p = {p[i]:i for i in range(len(p))}
    for v in range(num_v):
        G.nodes[v]['team'] = int(map_p[pi[v]] + 1)
    return G       

In [13]:
def run(solver, in_file: str, out_file: str, file_name, overwrite,df):
    instance = read_input(in_file)
    print(file_name)
    cost, k, pi, state = solver(instance)
    if not math.isclose(score(instance), cost):
        print(score(instance, separated=True))
        print(state.get_scores())
        return instance, pi, state
        # print(pi)
        # print([instance.nodes[v]['team'] for v in instance.nodes])
        
    r = df.index[df['name'] == file_name]
    overwrite = cost < float(df.iloc[r]['score']) - 10**-4
    # print(cost,float(df.iloc[r]['score']) )
    if overwrite:
        df = pd.read_csv('best_scores.csv')
        df.loc[r, 'score'] = cost
        df.loc[r, 'k'] = k
        df.to_csv('best_scores.csv', index=False)
        print(f"{str(in_file)}: cost", score(instance))
        write_output(instance, out_file, overwrite)

    return pi

def save_and_plot(files, scores, output_name):
    counts, bins = np.histogram(scores)
    plt.stairs(counts, bins)
    
    scores_and_names = list(zip(files, scores))
    scores_and_names.sort()
    cat_size = len(scores)//3
    
    second = lambda x: x[1]
    large = sum(map(second, scores_and_names[:cat_size]))/cat_size
    medium = sum(map(second, scores_and_names[cat_size:cat_size*2]))/cat_size
    small =  sum(map(second, scores_and_names[cat_size*2:]))/cat_size
    
    os.makedirs('scores/' + output_name, exist_ok=True)  
    f = open('scores/' + output_name + '/avg_scores.txt', "w")
    f.write("small_avg: {0} \nmedium_avg: {1} \nlarge_avg: {2}".format(small, medium, large))
    f.close()
    
    df = pd.DataFrame(zip(files, scores), columns=['name', 'score'])
    df.to_csv('scores/' + output_name + '/out.csv')

def run_encapsulate(inp):
    file, solver, in_dir, out_dir,overwrite,df = inp
    r = df.index[df['name'] == file]
    
    err = run(solver, str(Path(in_dir) / file), str(Path(out_dir) / f"{file[:-len('.in')]}.out"), file, overwrite,df)
    if err:
        return err[2]
    return
    
def run_all(solver, in_dir, out_dir, overwrite: bool=False):
    scores = defaultdict()
    files = [x for x in os.listdir(in_dir) if x.endswith('.in')]
    # files.sort(reverse=True)
    random.shuffle(files)
    for file in tqdm(files):
        ### only run if havent done so before
        r = df.index[df['name'] == file]
        if float(df.iloc[r]['score']) >= 200000:
            err = run(solver, str(Path(in_dir) / file), str(Path(out_dir) / f"{file[:-len('.in')]}.out"), file, overwrite)
            if err:
                return err[2]
        pass
    return None

In [4]:
tar('outputs', overwrite = True)

In [14]:
def solve(G: nx.Graph):
    NUM_ITER_OUT = 3
    NUM_ITER_IN = 2
    min_cost = 10**20
    pi = []
    best_k = 0
    best_state= None
    quit = False
    epsilon = 10**-5
    num_v = len(G.nodes)
    size_to_time = {100: 0.1, 300:0.15, 1000:0.21}
    time = size_to_time[num_v]
    
    for k in range(2,20,1):

            # best_state = copy.deepcopy(tourney.state)
        for i in range(NUM_ITER_OUT):
            # tourney = GraphPartitioner(copy.deepcopy(G), k)
            tourney = GraphPartitioner((G), k)
            tourney.set_schedule(tourney.auto(minutes=time))
            cost = tourney.energy()
            if cost < min_cost - epsilon:
                min_cost, pi, best_k = cost, tourney.state.pi.copy(), tourney.state.k
            if cost > 2*min_cost:
                quit = True
            if quit:
                break
            for _ in range(NUM_ITER_IN):
                partitioning, cost = tourney.anneal()
                if cost > 2*min_cost:
                    quit = True
                if quit:
                    break
                if cost < min_cost - epsilon:
                    min_cost, pi, best_k = cost, tourney.state.pi.copy(), tourney.state.k
                    # best_state = copy.deepcopy(tourney.state)
        if quit:
            break
    apply_pi(G, pi)
    return min_cost, best_k, pi, best_state

In [10]:
df = pd.read_csv('best_scores.csv')

# pi = run(solve, 'inputs/small13.in', 'outputs/small13.out', 'small13.in', True,df)

small13.in


In [30]:
G = read_input('inputs/small13.in')
apply_pi(G, pi)
score(G)

2009.5536923187667

In [19]:
def get_low_scoring_inps():
    f = open('low_scores.txt', 'r')
    lines = f.readlines()

    names = []
    for line in lines:
        size, num = line.strip().split(' ')
        names.append(size+num+'.in')
    return names
low_scores = get_low_scoring_inps()
len(low_scores)

49

In [5]:
files = [x for x in os.listdir('inputs') if x.endswith('.in')]

In [17]:
pool = mp.Pool(mp.cpu_count())
df = pd.read_csv('best_scores.csv')

pool.map(run_encapsulate, [(file, solve, 'inputs', 'outputs',True, df) for file in tqdm(low_scores)])

pool.close()

100%|██████████| 115/115 [00:00<00:00, 246345.74it/s]

medium218.inmedium84.in

medium94.in
large120.insmall140.insmall203.in


large211.in
large180.in





medium256.in
inputs/medium256.in: cost 272.8281828459045
large240.in
large216.in
large181.in
inputs/small140.in: cost 1219.2493960703473
small158.in
inputs/large181.in: cost 449.2277266951576
small84.in
small94.in
inputs/medium84.in: cost 5461.205948025516
medium115.in
inputs/small84.in: cost 3313.8538391474267
medium36.in
inputs/medium94.in: cost 46343.87934927351
small174.in
large184.in
inputs/medium115.in: cost 3313.3874872134616
small215.in
small142.in
large23.in
inputs/large211.in: cost 46050.26224646513
small229.in
medium46.in
inputs/small158.in: cost 31842.31591025766
large156.in
medium128.in
inputs/small94.in: cost 28471.14243574018
large60.in
medium16.in
inputs/small229.in: cost 3313.8538391474267
large157.in
inputs/medium128.in: cost 4465.467777460086
inputs/small174.in: cost 94765.90918776828
inputs/medium16.in: cost 12146.30885954619
inputs/small142.in: cost 35349.09830685073
inputs/large156.in: cost 12554.88198445573
inputs/large60.in: cost 12554.881984455735
inputs/large1

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=beec2354-b280-46f2-b093-65afd3bc9b88' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>