In [86]:
import os
import io
import sys
import pandas as pd
import numpy as np
import requests
import requests_cache
import pandas as pd

ELECTION_2016_CANDIDATES_URI = "https://results.aec.gov.au/20499/Website/Downloads/HouseMembersElectedDownload-20499.csv"
ELECTION_2016_RESULTSTPP_URI = "https://results.aec.gov.au/20499/Website/Downloads/HouseTcpByCandidateByVoteTypeDownload-20499.csv"

In [103]:
requests_cache.install_cache(".cache")

req_election2016_candidates = requests.get(ELECTION_2016_CANDIDATES_URI).text.split("\r\n", 1)[1] 
req_election2016_results = requests.get(ELECTION_2016_RESULTSTPP_URI).text.split("\r\n", 1)[1] 

election2016_candidates = pd.read_csv(io.StringIO(req_election2016_candidates))
election2016 = pd.read_csv(io.StringIO(req_election2016_results))

election2016.groupby(["DivisionID"])["TotalVotes"].sum()
# election2016
e = pd.merge(election2016.groupby(["DivisionID"])["TotalVotes"].sum(), election2016.loc[election2016["Elected"] == "Y"], left_on="DivisionID", right_on="DivisionID")

e["margin_votes"] = e["TotalVotes_y"] - (e["TotalVotes_x"] - e["TotalVotes_y"]) 
e["margin"] =  round(e["margin_votes"] / e["TotalVotes_x"] * 100, 2)
e["candidate"] = e["GivenNm"] + " " + e["Surname"]
e["electorate"] = e["DivisionNm"]
e["state"] = e["StateAb"]
e["party"] = e["PartyAb"]

election2016 = e[[
    "electorate",
    "candidate",
    "state",
    "party",
    "margin_votes",
    "margin",
    "Swing"
]]

grants = pd.read_csv("data/grants_geocoded.csv")

grants = pd.merge(grants, election2016, how="left", on="electorate")
grants_seat = pd.merge(election2016, grants.groupby(["electorate"])["name"].count(), how="left", on="electorate")
grants_seat = pd.merge(grants_seat, grants.groupby(["electorate"])["amount"].sum(), how="left", on="electorate")

grants_seat = grants_seat.fillna(0)

grants_seat["grants"] = grants_seat["name"].apply(int) #.apply(lambda x: int(x) if x else 0)
grants_seat["amount"] = grants_seat["amount"].apply(int) #.apply(lambda x: int(x) if x else 0)

grants_seat.drop(["name"], inplace=True, axis="columns")


# grants_seat

# grants_seat.to_json("src/data/grants_seat.json", orient="table", index=False)


Unnamed: 0,electorate,candidate,state,party,margin_votes,margin,Swing,amount,grants
0,Canberra,Gai BRODTMANN,ACT,ALP,21774,16.92,0.95,584464,7
1,Fenner,Andrew Keith LEIGH,ACT,ALP,34462,27.79,1.40,741981,4
2,Banks,David COLEMAN,NSW,LP,2588,2.88,-1.36,860000,2
3,Barton,Linda BURNEY,NSW,ALP,14560,16.60,3.91,5245,1
4,Bennelong,John ALEXANDER,NSW,LP,17923,19.43,1.95,50000,1
...,...,...,...,...,...,...,...,...,...
145,Flynn,Ken O'DOWD,QLD,LNP,1814,2.08,-5.49,927924,5
146,Durack,Melissa PRICE,WA,LP,16957,22.11,-3.98,816642,14
147,McMahon,Chris BOWEN,NSW,ALP,20846,24.23,7.48,223575,3
148,Wright,Scott BUCHHOLZ,QLD,LNP,16984,19.23,-2.22,866533,6


In [180]:
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

grants_seat["party_g"] = grants_seat["party"].apply(lambda x: "LP" if x in ["LP", "NP", "LNP"] else x)

lnp_reg = LinearRegression()
lnp_x = pd.DataFrame(grants_seat.loc[grants_seat["party_g"] == "LP"], columns=["margin"])
lnp_y = pd.DataFrame(grants_seat.loc[grants_seat["party_g"] == "LP"], columns=["amount"])


alp_reg = LinearRegression()
alp_x = pd.DataFrame(grants_seat.loc[grants_seat["party_g"] != "LP"], columns=["margin"])
alp_y = pd.DataFrame(grants_seat.loc[grants_seat["party_g"] != "LP"], columns=["amount"])
# lnp_y 
# lnp_t = pd.DataFrame(lnp.amount, )

lnp_reg.fit(lnp_x, lnp_y["amount"])
lnp_y_pred = lnp_reg.predict(lnp_x)

alp_reg.fit(alp_x, alp_y["amount"])
alp_y_pred = alp_reg.predict(alp_x)


# lnp_reg.fit(
#     , 
#     grants_seat.loc[grants_seat["party_g"] == "LP"]["amount"].values.reshape(-1, 1)
# )


# fig = px.line(lnp, x="margin", y="amount", title='Life expectancy in Canada')

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=grants_seat.loc[grants_seat["party_g"] == "LP"]["margin"], 
        y=grants_seat.loc[grants_seat["party_g"] == "LP"]["amount"], 
        mode="markers",
        marker_color="blue"
    )
)
fig.add_trace(
    go.Scatter(
        x=grants_seat.loc[grants_seat["party_g"] != "LP"]["margin"], 
        y=grants_seat.loc[grants_seat["party_g"] != "LP"]["amount"], 
        mode="markers",
        marker_color="red"
    )
)
fig.add_trace(
    go.Scatter(
        x=grants_seat.loc[grants_seat["party_g"] == "LP"]["margin"], 
        y=lnp_y_pred, 
        mode="lines",
        marker_color="darkblue"
    )
)

fig.add_trace(
    go.Scatter(
        x=grants_seat.loc[grants_seat["party_g"] != "LP"]["margin"], 
        y=alp_y_pred, 
        mode="lines",
        marker_color="darkred"
    )
)
fig.show()

# lnp_y_pred
# lnp_x


In [181]:

fig = px.scatter(grants_seat, x="margin", y="amount", color="party_g", size="grants", hover_data=['electorate'])
fig.show()