In [8]:
import xml.etree.ElementTree as ET
from pathlib import Path
import datetime as dt
import xmltodict
import pandas as pd
import os
import zipfile
import shutil

def read_eml(path: Path):

    """Convert EML file to dictionary"""

    try:
        return xmltodict.parse(path.read_text(encoding="utf-8"))

    except UnicodeDecodeError as e:
        print(path.name)
        print(e)
        return None
    
# Get the user's home directory
current_directory = os.getcwd()

In [9]:
# HAVE YOU MOVED THE ZIP FILE INTO THE WORKING DIRECTORY??

zip_filename = 'aec-mediafeed-Detailed-Verbose-29581-20231012180242.zip'

# Let's proceed. 

# Construct the full path to your file
zip_file_path = os.path.join(current_directory, zip_filename)

# unzipped directory
unzip_directory_name = zip_filename[:-4]

# Create a ZipFile object and extract the contents
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_directory_name)

print('Unzipped to: ' + unzip_directory_name)
print('Now filtering to spreadsheet ...')

os.chdir(unzip_directory_name + '//xml')
file_name = os.listdir()[0]

f = open(file_name, encoding='utf-8')
text = f.read()
f.close()
temp = xmltodict.parse(text)

print('File opened...')

df = pd.DataFrame()

for PollingDistrict in temp['MediaFeed']['Results']['Election']['Referendum']['Contests']['Contest']['PollingDistricts']['PollingDistrict']:
    temp_dict = {
        'Seat': PollingDistrict['PollingDistrictIdentifier']['Name'],
        'SeatID': PollingDistrict['PollingDistrictIdentifier']['@Id'],
        'State': PollingDistrict['PollingDistrictIdentifier']['StateIdentifier']['@Id'],
        'Enrolment': float(PollingDistrict['Enrolment']['#text']),
        'Yes': float(PollingDistrict['ProposalResults']['Option'][0]['Votes']['#text']),
        'No': float(PollingDistrict['ProposalResults']['Option'][1]['Votes']['#text']),
        'Informal': float(PollingDistrict['ProposalResults']['Informal']['Votes']['#text']),
        'Complete': PollingDistrict['ProposalResults']['@PollingPlacesExpected'] == PollingDistrict['ProposalResults']['@PollingPlacesReturned'],
        'Votes_returned': float(PollingDistrict['ProposalResults']['Total']['Votes']['#text']),
        'Votes_returned_pc': float(PollingDistrict['ProposalResults']['Total']['Votes']['@Percentage'])
    }
    temp_dict['Informal_pc_tot'] = temp_dict['Informal'] / temp_dict['Enrolment']
    temp_dict['Yes_pc_tot'] = temp_dict['Yes'] / temp_dict['Enrolment']
    temp_dict['No_pc_tot'] = temp_dict['No'] / temp_dict['Enrolment']
    if temp_dict['Votes_returned'] == 0:
        temp_dict['Informal_pc_so_far'] = 'NULL'
        temp_dict['Yes_pc_tot'] = 'NULL'
        temp_dict['No_pc_tot'] = 'NULL'
    else:
        temp_dict['Informal_pc_so_far'] = temp_dict['Informal'] / temp_dict['Votes_returned']
        temp_dict['Yes_pc_tot'] = temp_dict['Yes'] / temp_dict['Votes_returned']
        temp_dict['No_pc_tot'] = temp_dict['No'] / temp_dict['Votes_returned']
    df = pd.concat(
        [df, pd.DataFrame(temp_dict, index=[0])], ignore_index=True
    )

df = df.sort_values(by='Votes_returned_pc', ascending=False)
print(df)

# Get the current working directory
current_directory = os.getcwd()

# Get the parent directory
parent_directory = os.path.dirname(current_directory)

# Get the grandparent directory
grandparent_directory = os.path.dirname(parent_directory)

# Change the working directory to the grandparent directory
os.chdir(grandparent_directory)

Unzipped to: aec-mediafeed-Detailed-Verbose-29581-20231012180242
Now filtering to spreadsheet ...
File opened...
            Seat SeatID State  Enrolment     Yes      No  Informal  Complete  \
22          Hume    125   NSW   126270.0  3264.0  3753.0      89.0     False   
15   Eden-Monaro    117   NSW   117428.0  1659.0  1948.0      26.0     False   
2         Fenner    102   ACT   103863.0  1403.0  1121.0      29.0     False   
0           Bean    318   ACT   111176.0  1479.0  1181.0      36.0     False   
51       Solomon    307    NT    72938.0   285.0   274.0      14.0     False   
..           ...    ...   ...        ...     ...     ...       ...       ...   
88         Makin    187    SA   123221.0     0.0     0.0       0.0     False   
87      Kingston    186    SA   125968.0     0.0     0.0       0.0     False   
86     Hindmarsh    185    SA   129725.0     0.0     0.0       0.0     False   
85          Grey    183    SA   126501.0     0.0     0.0       0.0     False   
150    

In [None]:
# vs. 2PP 2020 (by seat)

In [None]:
# vs. tertiary education rates
 

In [11]:
print(df['SeatID'].to_list())

['125', '117', '102', '318', '307', '136', '208', '204', '197', '111', '157', '235', '328', '172', '242', '148', '216', '239', '160', '149', '245', '236', '222', '171', '317', '164', '193', '305', '240', '200', '201', '133', '311', '229', '166', '205', '302', '226', '101', '209', '168', '109', '217', '118', '124', '151', '120', '146', '139', '128', '132', '306', '218', '121', '167', '119', '122', '152', '135', '173', '127', '159', '115', '224', '322', '126', '223', '221', '220', '219', '130', '131', '215', '326', '309', '214', '213', '212', '321', '211', '210', '225', '228', '134', '114', '247', '103', '244', '243', '104', '105', '106', '107', '312', '238', '237', '108', '112', '113', '234', '233', '232', '324', '323', '315', '137', '161', '179', '178', '177', '176', '175', '174', '251', '153', '150', '170', '304', '169', '310', '155', '156', '165', '158', '163', '162', '252', '316', '180', '320', '182', '138', '140', '203', '249', '144', '198', '145', '196', '195', '319', '250', '192'