## PyPy comparison
This notebook will show you the speeds up obtained using the PyPy kernel. In the original 'merged_constituencies' notebook we had a test which originally about 200s for the `algo_x` code and about 130s for the `AlgorithmX` code. There were roughly 4.4m solutions

In [3]:
# import numpy as np
import pandas as pd
import os

from datetime import datetime,timedelta

# Algorithm X codes
from algo_x import *
from AlgorithmX import *

In [6]:
def if_exists_read_csv(filename: str):
    if not os.path.exists(filename):
        print(f"{filename} does not exist, try running the code in the Analysis/Input/merged_constituencies.ipynb notebook and come back to this notebook when this file has been generaetd.")
        return None
    return pd.read_csv(filename)

In [7]:
# Import neighbouring constituencies data created in the Analysis folder
const_pairs = if_exists_read_csv("../Analysis/Data/const_pairs.csv.gz")
const_tris = if_exists_read_csv("../Analysis/Data/const_tris.csv.gz")
const_quads = if_exists_read_csv("../Analysis/Data/const_quads.csv.gz")

In [10]:
const_quads.sample(10)

Unnamed: 0,region,name1,name2,name3,name4,pairing
27283,North West,"Liverpool, Walton",Sefton Central,Southport,Garston and Halewood,27284
3501,East,Harwich and North Essex,South Suffolk,Witham,Saffron Walden,3502
52217,Wales,Merthyr Tydfil and Rhymney,Caerphilly,Cynon Valley,Cardiff South and Penarth,52218
34974,South East,Beaconsfield,Maidenhead,Slough,Spelthorne,34975
49183,South West,North Dorset,Somerton and Frome,Yeovil,Poole,49184
8854,East Midlands,Charnwood,Rutland and Melton,Grantham and Stamford,Gainsborough,8855
17315,London,"Enfield, Southgate",Edmonton,Walthamstow,Hackney North and Stoke Newington,17316
72381,Yorkshire and The Humber,Haltemprice and Howden,Kingston upon Hull West and Hessle,Selby and Ainsty,Doncaster Central,72382
58623,West Midlands,Bromsgrove,Halesowen and Rowley Regis,Solihull,"Birmingham, Erdington",58624
9317,East Midlands,Corby,Rutland and Melton,Rushcliffe,Lincoln,9318


In [None]:
# Quick funcheadon running algorithm X (for Exact cover of neighbouring constiuencies) and printing the total number of solutions
def all_solns(const_pairs, region, n=None):
    df = const_pairs[const_pairs['region'] == region_name]
    Y = {}
    for i in range(len(df)):
        Y[df['set_no'].iloc[i]] = {df['name1'].iloc[i], df['name2'].iloc[i]}

    all_solns = ExactCover(Y, random = True)
    i = 0
    for a in all_solns:
        i += 1
    # Find out how many constituencies there are in the dictionary.
    X = set([x for y in Y.values() for x in y])
    print(f"For the {region} region there are {i:,} solutions when there are {len(X)} constituencies.")

In [None]:
# Using the algo_x code 
region_name = 'South East'
# Pick out one of the 'triplets' 
random_trit = const_tris.iloc[2648] 
const_pairs2 = const_pairs[(~const_pairs['name1'].isin(random_trit)) & (~const_pairs['name2'].isin(random_trit))]
start = datetime.now() 
all_solns(const_pairs2, region_name)
end = datetime.now() 
print(f"The time taken is {end - start}s")

In [None]:
# Using AlgorithmX code
# This is the code that is installed when you run "pip install algorithm-x"
region_name = 'South East'
random_tri = const_tris.iloc[2648]
const_pairs2 = const_pairs[(~const_pairs['name1'].isin(random_tri)) & (~const_pairs['name2'].isin(random_trit))]
const_pairs3 = const_pairs2[const_pairs2['region'] == region_name]
const_list = list(set(const_pairs3['name1']).union(set(const_pairs3['name2'])))
# We need to replace the strings with numbers 
const_list = list(set(const_pairs3['name1']).union(set(const_pairs3['name2'])))
n = len(const_list)
mapping = {}
for i in range(n):
    mapping[const_list[i]] = i
const_pairs3 = const_pairs3.replace({'name1': mapping, 'name2': mapping})
solver = AlgorithmX(n)
for index, row in const_pairs3.iterrows():
    solver.appendRow([row['name1'], row['name2']], row['set_no'])
start = datetime.now()
i = 0
for solution in solver.solve():
    i += 1
end = datetime.now()
print(f"For the {region_name} region there are {i:,} solutions when there are {n} constituencies.")
print(f"The time taken is {end - start}s")

We barely see any speed up with the `algo_x` code (possibly it's even slower), however using the `AgortihmX` code with a PyPy kernel shows a speed up of about 5-fold to that seen before.