# Concatenation of results
Using the solutions found from Algorithm X we'll now concatenate our results so that any future analysis should be very quick. We will only use the results from using the D'Hondt method but it should be relatively simple to repeat this for any other method where we have the results.

In these results we will also add in the following: the island constituencies of 'Isle of Wight', 'Orkney and Shetland', 'Ynys Mon', 'Na h-Eileanan an Iar' and the results from the Yorkshire and Humber region ('Brigg and Goole', 'Scunthorpe', 'Cleethorpes', 'Great Grimsby'). Also note that the Speaker's constituency of 'Chorley' (in the 'North West' region) has not been included as it is always one.

In [1]:
import numpy as np
import pandas as pd
import glob
from ast import literal_eval
import re

In [2]:
df2h = pd.read_csv("../Data/MergedResults/const_merged_2_dhondt.csv.gz")
df3h = pd.read_csv("../Data/MergedResults/const_merged_3_dhondt.csv.gz")
df4h = pd.read_csv("../Data/MergedResults/const_merged_4_dhondt.csv.gz")
# df2w = pd.read_csv("../Data/MergedResults/const_merged_2_webster.csv.gz")
# df3w = pd.read_csv("../Data/MergedResults/const_merged_3_webster.csv.gz")
# df4w = pd.read_csv("../Data/MergedResults/const_merged_4_webster.csv.gz")

In [3]:
files = glob.glob("../Data/SampledSolutions/sampled_solns_*.csv.gz")
predicted_seats_dict = {}
# Counter for dictionary
for file in files:
    init_dict = {}
    seats = int(re.findall("[0-9]+", file)[0])
    region = re.sub("_[0-9]+.csv.gz", "", file.replace("../Data/SampledSolutions/sampled_solns_", ""))
    key = region + "_" + str(seats)
    region = region.replace("_", " ")
    if seats < 4:
        df = pd.read_csv(file, dtype={'region': str}, converters={'soln': literal_eval})
    else:
        df = pd.read_csv(file, dtype={'region': str}, converters={'soln': literal_eval, 'triplet': literal_eval})
    # Counter for the key of the dictionary that will eventually form the 
    i = 0
    for index, row in df.iterrows():
        i += 1
        if seats == 2:
            df_seats = df2h[df2h['set_no'].isin(row['soln'])]
            if 'triplet' in df.columns:
                df_seats = pd.concat([df_seats, df3h[df3h['set_no'] == row['triplet']]], sort=False)
        elif seats == 3:
            df_seats = df3h[df3h['set_no'].isin(row['soln'])]
            if 'pair' in df.columns:
                df_seats = pd.concat([df_seats, df2h[df2h['set_no'] == row['pair']]], sort=False)
            if 'quad' in df.columns:
                df_seats = pd.concat([df_seats, df4h[df4h['set_no'] == row['quad']]], sort=False)
        elif seats == 4:
            df_seats = df4h[df4h['set_no'].isin(row['soln'])]
            if 'triplet' in df.columns:
                if type(row['triplet']) == list:
                    df_seats = pd.concat([df_seats, df3h[df3h['set_no'].isin(row['triplet'])]], sort=False)
                else:
                    df_seats = pd.concat([df_seats, df3h[df3h['set_no'] == row['triplet']]], sort=False)
        # Also have to add in the island constituencies which have been kept separate
        if region == "Scotland":
            dummy = pd.DataFrame(data={"Liberal Democrat": 1, "Scottish National Party": 1, "set_no":99999}, index = [99999,])
        elif region == "Wales" or region == "South East":
            dummy = pd.DataFrame(data={"Conservative": 1, "set_no":99999}, index = [99999,])
        # In addition have to manually enter back in four seats from 'Yorkshire & the Humber' region due to
        # a circular argument
        elif region == "Yorkshire and The Humber":
            if seats < 4:
                dummy = pd.DataFrame(data={"Conservative": 4, "set_no":99999}, index = [99999,])
            elif seats == 4:
                dummy = pd.DataFrame(data={"Conservative": 3, "Labour": 1, "set_no":99999}, index = [99999,])
        if region in ["Scotland","Wales'","South East","Yorkshire and The Humber"]:
            df_seats = pd.concat([df_seats, dummy], sort=False).fillna(0)

        init_dict[i] = df_seats.apply(sum)
    predicted_seats_dict[key] = pd.concat(init_dict, axis=1, sort=False).T
    predicted_seats_dict[key] = predicted_seats_dict[key].assign(region = region, seats=seats)
    predicted_seats_dict[key] = predicted_seats_dict[key].drop(columns='set_no')
predicted_seats = pd.concat(predicted_seats_dict, sort=False)
predicted_seats = predicted_seats.sort_values(['region','seats'])
predicted_seats = predicted_seats.reset_index(drop=True)

In [4]:
predicted_seats.to_csv("../Data/predicted_seats.csv.gz", index=False, compression='gzip')