# Concatenation of results
Using the solutions found from Algorithm X we'll now concatenate the results obtained using different voting methods so that, for each possible set of solutions (for each region and voting method) we can obtain the amount of seats that would have been won. This calculation takes some time so we are doing it here first which should speed up the calculations in 'predicted_results'ipynb' notebook.

In these results we will also add in the following: 

1. the island constituencies of 'Isle of Wight', 'Orkney and Shetland', 'Ynys Mon', 'Na h-Eileanan an Iar'
1. the results from ther constituencies in the Yorkshire and Humber region which had a ('Brigg and Goole', 'Scunthorpe', 'Cleethorpes', 'Great Grimsby'). 

Also note that the Speaker's constituency of 'Chorley' (in the 'North West' region) has not been included as it is always one and makes no difference to the overall result.

In [1]:
import numpy as np
import pandas as pd
import glob
from ast import literal_eval
import re

In [2]:
df2h = pd.read_csv("../Data/MergedResults/const_merged_2_webster.csv.gz")
df3h = pd.read_csv("../Data/MergedResults/const_merged_3_webster.csv.gz")
df4h = pd.read_csv("../Data/MergedResults/const_merged_4_webster.csv.gz")

In [3]:
files = glob.glob("../Data/SampledSolutions/sampled_solns_*.csv.gz")
predicted_seats_dict = {}
for method in ["dhondt","webster","huntington","imperiali","hare","droop"]:
    df2h = pd.read_csv(f"../Data/MergedResults/const_merged_2_{method}.csv.gz")
    df3h = pd.read_csv(f"../Data/MergedResults/const_merged_3_{method}.csv.gz")
    df4h = pd.read_csv(f"../Data/MergedResults/const_merged_4_{method}.csv.gz")
    for file in files:
        init_dict = {}
        seats = int(re.findall("[0-9]+", file)[0])
        region = re.sub("_[0-9]+.csv.gz", "", file.replace("../Data/SampledSolutions/sampled_solns_", ""))
        key = method + "_" + region + "_" + str(seats)
        region = region.replace("_", " ")
        # Use literal eval since we can have multiple values there.
        if seats < 4:
            df = pd.read_csv(file, dtype={'region': str}, converters={'soln': literal_eval})
        else:
            df = pd.read_csv(file, dtype={'region': str}, converters={'soln': literal_eval, 'triplet': literal_eval})
        # Counter for the key of the dictionary that will eventually form the 
        i = 0
        for index, row in df.iterrows():
            i += 1
            if seats == 2:
                df_seats = df2h[df2h['set_no'].isin(row['soln'])]
                if 'triplet' in df.columns:
                    df_seats = pd.concat([df_seats, df3h[df3h['set_no'] == row['triplet']]], sort=False)
            elif seats == 3:
                df_seats = df3h[df3h['set_no'].isin(row['soln'])]
                if 'pair' in df.columns:
                    df_seats = pd.concat([df_seats, df2h[df2h['set_no'] == row['pair']]], sort=False)
                if 'quad' in df.columns:
                    df_seats = pd.concat([df_seats, df4h[df4h['set_no'] == row['quad']]], sort=False)
            elif seats == 4:
                df_seats = df4h[df4h['set_no'].isin(row['soln'])]
                if 'triplet' in df.columns:
                    if type(row['triplet']) == list:
                        df_seats = pd.concat([df_seats, df3h[df3h['set_no'].isin(row['triplet'])]], sort=False)
                    else:
                        df_seats = pd.concat([df_seats, df3h[df3h['set_no'] == row['triplet']]], sort=False)
            init_dict[i] = df_seats.fillna(0).apply(sum)
        predicted_seats_dict[key] = pd.concat(init_dict, axis=1, sort=False).T
        predicted_seats_dict[key] = predicted_seats_dict[key].assign(region = region, seats=seats, method = method)
        # Have to add in the island constituencies which have been kept separate and allocate them on a FPTP method
        if region == "Scotland":
            predicted_seats_dict[key]["Liberal Democrat"] += 1
            predicted_seats_dict[key]["Scottish National Party"] += 1
        elif region == "Wales" or region == "South East":
            predicted_seats_dict[key]["Conservative"] += 1
        # In addition have to manually enter back in four seats from 'Yorkshire & the Humber' region due to
        # a circular argument.
        elif region == "Yorkshire and The Humber":
            # When we have a 4 seat grouping it was always split the same way. For fewer than 4 we had a few differences.
            if seats == 4:
                predicted_seats_dict[key]["Conservative"] += 3
                predicted_seats_dict[key]["Labour"] += 1
            else:
                if method in ['dhondt', 'huntington', 'imperiali', 'droop']:
                    predicted_seats_dict[key]["Conservative"] += 4
                else:
                    predicted_seats_dict[key]["Conservative"] += 2
                    predicted_seats_dict[key]["Labour"] += 2
        predicted_seats_dict[key] = predicted_seats_dict[key].drop(columns='set_no')
predicted_seats = pd.concat(predicted_seats_dict, sort=False)
predicted_seats = predicted_seats.sort_values(['region','seats'])
predicted_seats = predicted_seats.reset_index(drop=True)

predicted_seats.to_csv("../Data/predicted_seats.csv.gz", index=False, compression='gzip')