In [None]:
%%script false --no-raise-error
<h1 class="header">About the Project</h1>
<div class="text-box bulleted">
    <h2 class="header">Overview</h2>
    <p class="text">The project involves the statistical analysis of the official results of the Intra-School Council Elections using libraries like Pandas, Matplotlib & Seaborn along with SQL.</p>
    <p class="text">The student council elections are held every year to choose representatives for different leadership roles in the school, managed entirely by student-led tech teams.</p>
    <h2 class="header">Election Software</h2>
    <p class="text">The (1)<a href="http://github.com/d1vij/electionsoftware" target="_blank" rel="noopener noreferrer"><em>Election Software</em></a> through which the elections were held is a fullstack application that I designed and developed, featuring a frontend built with HTML, CSS, and TypeScript, and a REST api by FastAPI (Python). Votes were securely stored in a MongoDB database.</p>
    <h2 class="header">Report Generation</h2>
    <p class="text">This report is in fact a single Jupyter notebook exported via a custom script to HTML and styled via CSS, (and then printed). Source of which is available at (2)<a href="http://github.com/d1vij/ip-proj" target="_blank" rel="noopener noreferrer"><em>GitHub Repo</em></a>.</p>
</div>

<div class="text-box">
    <h2 class="header">References</h2>
    <p class="text">(1) https://github.com/d1vij/electionsoftware</p>
    <p class="text">(2) https://github.com/d1vij/ip-proj</p>
</div>

<div class="text-box">
    <h2 class="header">Libraries used</h2>
    <ol><li><em>Pymongo</em> -> for querying vote documents from MongoDB server</li><li><em>sqlite3</em> -> for querying local sql database</li><li><em>pandas</em> -> Fora data manipulation and analysis</li><li><em>seaborn</em> -> Graphing</li><li><em>matplotlib</em> -> Graphing</li></ol>
</div>

### Importing Stuff

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
%%script false --no-raise-error
<h3 class="header">Defining constants</h3>

In [None]:
_SPACE = " "
_UNDERSCORE = "_"
CONNECTION_URL = "mongodb+srv://vermadivij:elections@cluster1.kicphp2.mongodb.net/?retryWrites=true&w=majority&appName=cluster1"
DATABASE_NAME = "votes"
CLASSES = '10A 10B 10C 10D 10E 10F 10G 10H 10I 10J 11A 11B 11C 11D 11E 12A 12B 12C 12D 9A 9B 9C 9D 9E 9F 9G 9H 9I 9J absentees candidates'.split(_SPACE)

sns.set_theme()
def replace_spaces(string: str, replace_with=_UNDERSCORE):
    return string.replace(_SPACE, replace_with)

In [None]:
%%script false --no-raise-error
<h1 class="header">Structure of Data</h1>
<div class="text-box bulleted">
    <p class="text">The Votes were stored indivisually in a <em>MongoDB</em> server as in the following structure</p>
    <pre class="language-js">
        <code class="language-js">
<span class="comment">//example single vote document</span>
{
    "_id": {
        "$oid": "68a1819ceff178ec25b66fbb" <span class="comment">           // internal mongodb document id</span>
    },
    "token": "b489737f-7997-430c-950f-b8c1b22f68c3", <span class="comment"> // A unique uuid4 based token identifing the vote session</span>
    "client": "29", <span class="comment">                                  // Computer on which the vote was done</span>
    "vote_data": [ <span class="comment">                                   // candidates voted by the voter</span>
        {
            "name": "Abhichandra Charke", 
            "post": "Captain Boy"
        },
        {
            "name": "Gauravi Zade",
            "post": "Captain Girl"
        },
        {
            "name": "Kausar Chandra",
            "post": "Vice Captain Boy"
        },
        {
            "name": "Ketaki Phalle",
            "post": "Vice Captain Girl"
        }
    ]
}
</code></pre>
</div>
<div class="text-box bulleted">
    <p class="text"><em class="underlined">get_classwise_documents</em> -> The function returns array of <em>T_Class_Documents</em> objects</p>
    <p class="text">In which each object contains properties<ol><li><em class="underlined">name</em> -> The name of class </li><li><em class="underlined">votes</em> -> Array of <em>T_Vote</em></li></ol></p>
    <p class="text">T_Vote contains two properties<ol><li><em class="underlined">name</em> -> Name of candidate voted</li><li><em class="underlined">post</em>-> Post the candidate is voted for</li></ol></p>
</div>

In [None]:
def get_classwise_documents(
    connection_url: str, database_name: str, classes: list[str]
) -> list[dict]:
    import pymongo

    # fetches class-wise vote documents from mongodb cluster and returns dictionary with values as array of vote documents
    conn = pymongo.MongoClient(connection_url)
    database = conn.get_database(database_name)
    all_documents: list[dict] = []
    vote_document: dict
    for class_name in classes:
        class_documents: list[dict] = []
        collection = database.get_collection(class_name)

        for vote_document in collection.find({}):
            class_documents.append(vote_document["vote_data"])  # type: ignore

        all_documents.append({"name": class_name, "votes": class_documents})

    return all_documents


def download_results():
    import json

    CONNECTION_URL = "mongodb+srv://vermadivij:elections@cluster1.kicphp2.mongodb.net/?retryWrites=true&w=majority&appName=cluster1"
    CLASSES = ['10A', '10B', '10C', '10D', '10E', '10F', '10G', '10H', '10I', '10J', '11A', '11B', '11C', '11D', '11E', '12A', '12B', '12C', '12D', '9A', '9B', '9C', '9D', '9E', '9F', '9G', '9H', '9I', '9J', 'absentees', 'candidates']  # fmt: off
    documents = get_classwise_documents(CONNECTION_URL, "votes", CLASSES)
    with open("votes.json", "w+") as file:
        file.write(json.dumps(documents))

### Running Compilation Functions

In [None]:
%%script false --no-raise-error
<h1 class="header">Compiling classwise data into dataframes</h1><div class="text-box bulleted">
    <p class="text"><em class="underlined">calculate_total_votes_of_class</em> → Function to compute total votes
        obtained by each candidate in a single class.</p>
    </div>
    <div class="text-box">
        <p class="text">Working:</p>
    <ol><li><p class="text">Initializes a <em>compiled_votes</em> votes for each candidate under every post.</p></li><li><p class="text">Iterates through <em>votes</em> in a class document.</p></li><li><p class="text">Increments the candidate's count under the respective post.</p></li><li><p class="text">Returns an class name and compiled vote data.</p></li></ol></div>
    <div class="text-box bulleted">
    <p class="text"><em class="underlined">create_dataframes</em> → Function to transform compiled vote data into Pandas DataFrames for analysis.</p>
</div>
    <div class="text-box">
    <p class="text">Steps:</p><ol><li><p class="text">Creates an empty DataFrame for each post, with <em>CLASSES</em> as rows and candidates as columns.</p></li><li><p class="text">Fills each DataFrame with vote counts from <em>compiled_votes</em>.</p></li><li><p class="text">Returns a dictionary of DataFrames, one per post, for further statistical analysis and visualization.</p></li></ol>
</div>

In [None]:
# fmt: off
candidate_data = {
    "Captain Boy": [ "Aadityaraje Desai", "Abhichandra Charke", "Praneel Deshmukh", "Rachit Srivastava", ],
    "Captain Girl": [ "Tvisha Shah", "Gauravi Zade", "Kirthika Jayachander", "Naisha Rastogi", ],
    "Vice Captain Boy": [ "Kausar Chandra", "Sagnik Ghosh", "Avaneesh Mahalle", "Krishna Yadav", "Viren Jadhav", ],
    "Vice Captain Girl": [ "Ketaki Phalle", "Trisha Kandpal", "Riya Shirode", "Kavya Mehta", "Sumedha Vaidya", ],
}
# fmt: on
import json

import pandas as pd


def calculate_votes(votes_json: str):
    with open(votes_json) as file:
        classwise_votes: list[dict] = json.loads(file.read())

    votes_df = pd.DataFrame(
        [
            {
                "class": _class["name"],
                "candidate_name": candidate["name"],
                "post": candidate["post"],
            }
            for _class in classwise_votes
            for votes in _class["votes"]
            for candidate in votes
        ],
        columns=["class", "candidate_name", "post"],  # type: ignore
    )
    return votes_df


votes_df = calculate_votes("votes.json")

In [None]:
# Classwise dataframes
classwise_grouped = votes_df.groupby("post")

cb = votes_df.loc[classwise_grouped.groups["Captain Boy"]].drop("post", axis=1)
cg = votes_df.loc[classwise_grouped.groups["Captain Girl"]].drop( "post", axis=1 )
vcb = votes_df.loc[classwise_grouped.groups["Vice Captain Boy"]].drop( "post", axis=1 )
vcg = votes_df.loc[classwise_grouped.groups["Vice Captain Girl"]].drop( "post", axis=1 )

postwise_votes_df = {
    "captain_boy": cb,
    "captain_girl": cg,
    "vice_captain_boy": vcb,
    "vice_captain_girl": vcg,
}

In [None]:
%%script false --no-raise-error
<div class="text-box">
  <p class="text">
    The <em>query()</em> function is a wrapper around SQLite queries, supporting both 
    data retrieval and data modification. 
  </p>
</div>

In [28]:
import sqlite3
from sqlite3 import Connection, Cursor, OperationalError
from typing import Any, Literal


class SqliteDatabase:
    def __init__(self, database: str):
        self.database = database
        self.conn: Connection | None = None
        self.cursor: Cursor | None = None

    def __enter__(self):
        try:
            self.conn = sqlite3.connect(self.database)
            self.cursor = self.conn.cursor()
        except Exception as e:
            print(f"Error occured in connecting to the database {self.database}. Error Details: {e}")

        return self.query

    def __exit__(self, exc_type, exc, tb):
        assert self.conn is not None
        assert self.cursor is not None

        self.cursor.close()
        self.conn.close()

        return False  # dont suppress the error

    def query(
        self,
        query: str,
        *,
        is_updation=False,  # is the current query contains some kind of updation ?? Doesnt return anything if true
        return_rows: None | Literal["str"] | Literal["tuple"] = None,
        table_heading: str | None = None,  # Title printed before printing output
    ) -> None | tuple[tuple[str, ...], ...]:
        assert self.conn is not None
        assert self.cursor is not None

        try:
            results = self.cursor.execute(query)
            self.conn.commit()
        except Exception as err:
            print("** Row / Column names with spaces should be enlcosed within quotes **")
            raise err

        if is_updation:
            return

        rows: list[Any] = results.fetchall()
        columns_headers: tuple[str, ...] = tuple(str(col[0]) for col in results.description)

        lines: tuple[tuple[str, ...], ...] = tuple((columns_headers, *rows))
        
        if return_rows == "tuple":
            return lines
        elif return_rows is None:
            # printing table header if provided
            if table_heading is not None:
                print(table_heading)

            # Finding max column width
            column_widths: list[int] = []

            for col_idx in range(len(lines[0])):
                widths = []
                for row_idx in range(len(lines)):
                    widths.append(len(str(lines[row_idx][col_idx])))
                column_widths.append(max(widths))

            # Printing column headers
            border_top_bottom = "+" + "-" * (sum(column_widths) + 3 * len(column_widths) - 1)  + "+"
            print(border_top_bottom)
            print("| ", end="")
            for idx, col_label in enumerate(lines[0]):
                print(str(col_label).ljust(column_widths[idx]), end=" | ")
            print()
            print(border_top_bottom)

            print(rows)
            for row in rows[1:]:
                print("| ", end="")
                for idx, col_val in enumerate(row):
                    print(str(col_val).ljust(column_widths[idx]), end=" | ")
                print()

            print(border_top_bottom)
            return None

In [None]:
%%script false --no-raise-error
<h1 class="header">Creating post dataframes and saving them to SQLite database</h1>

In [29]:
with SqliteDatabase("votes.db") as query:
    for post_name, post_df in postwise_votes_df.items():
        query(f"drop table if exists {post_name};", is_updation=True)
        query(
            f"create table {post_name} (class varchar(255), candidate_name varchar(255));",
            is_updation=True,
        )

        rows = []
        for (idx, row) in post_df.iterrows():
            (class_name, candidate_name) = (row["class"], row["candidate_name"])
            rows.append(f"('{class_name}', '{candidate_name}')")

        query(f"insert into {post_name} values" + ",".join(rows), is_updation=True)
        query(f"select * from {post_name} limit 10", table_heading=post_name)

captain_boy
+----------------------------+
| class | candidate_name     | 
+----------------------------+
1
| 10A   | Abhichandra Charke | 
2
| 10A   | Aadityaraje Desai  | 
3
| 10A   | Aadityaraje Desai  | 
4
| 10A   | Abhichandra Charke | 
5
| 10A   | Abhichandra Charke | 
6
| 10A   | Praneel Deshmukh   | 
7
| 10A   | Aadityaraje Desai  | 
8
| 10A   | Praneel Deshmukh   | 
9
| 10A   | Aadityaraje Desai  | 
+----------------------------+
captain_girl
+------------------------------+
| class | candidate_name       | 
+------------------------------+
1
| 10A   | Tvisha Shah          | 
2
| 10A   | Tvisha Shah          | 
3
| 10A   | Gauravi Zade         | 
4
| 10A   | Gauravi Zade         | 
5
| 10A   | Gauravi Zade         | 
6
| 10A   | Gauravi Zade         | 
7
| 10A   | Kirthika Jayachander | 
8
| 10A   | Gauravi Zade         | 
9
| 10A   | Naisha Rastogi       | 
+------------------------------+
vice_captain_boy
+--------------------------+
| class | candidate_name   | 
+----------

In [None]:
%%script false --no-raise-error

<h1 class="header">Statistical analysis</h1>
<div class="text-box">
    
</div>

In [None]:
%%script false --no-raise-error
<h1 class="header">Total votes across all classes</h1>

In [None]:
%%script false --no-raise-error
<h1 class="header">Total votes recieved by any Candidate</h1>
<div class="text-box">
    <p class="text">Working:</p><ol><li><p class="text">Defines <em>positions</em> for mapping each post to a subplot.</p></li><li><p class="text">Iterating over post dataframes in <em>result_dataframes</em>.</p></li><li><p class="text">Running SQL query to sum votes of in the table of that post via <em>query()</em>.</p></li><li><p class="text">Plotting a Seaborn bar chart of the total votes (<em>post_df.sum()</em>) in its respective subplot.</p></li></ol>
</div>

In [31]:
with SqliteDatabase("votes.db") as query:
    query("select distinct candidate_name from captain_boy")

+--------------------+
| candidate_name     | 
+--------------------+
1
| Abhichandra Charke | 
2
| Praneel Deshmukh   | 
3
| Rachit Srivastava  | 
+--------------------+


In [None]:
fig, axes = plt.subplots(2,2, figsize=(15,10))
positions = [(0,0), (0,1), (1,0), (1,1)]

for idx, (post_name, post_df) in enumerate(postwise_votes_df.items()):
    with SqliteDatabase("votes.db") as query:
        query(
            f"""
            select candidate_name as Name, count(*) as Votes
            from {post_name}
            group by candidate_name
            order by Votes desc;
            """,
            table_heading="Total Votes for - " + post_name
        )
    print()
    votes = post_df.groupby("candidate_name").size()
    sns.barplot(votes, ax=axes[positions[idx]]) #type: ignore
    axes[positions[idx]].set_title(post_name)
plt.tight_layout()
plt.show()
print()

In [None]:
%%script false --no-raise-error
<h1 class="header">Candidate popularity trends</h1>
<div class="text-box">
    <p class="text">Comparing candidate performances across classes</p>
    <p class="text">Following steps taken for each post's dataframe</p>
    <div class="text-box bulleted">
        <p class="text">Iterating over post dataframes and extracting all the rows belonging to a particular 'standard' from the post's dataframe by using <em>Regular Expressions</em></p>
        <p class="text">Dividing plot into 4 subplots for each class <em>(9, 10, 11, 12)</em></p>
        <p class="text">Plotting the section-wise votes recieved by a candidate</p>
    </div>
</div>


In [None]:
# candidate popularity trends - comparing candidate performances across classes

from matplotlib.ticker import MultipleLocator


def plot_popularity_trends(post_name: str, post_df: pd.DataFrame):

    # extracting rows belonging to a particular class from the post's dataframe using regular expressions
    class_wise_dataframes = [
        post_df[post_df.index.str.contains(_re)]
        for _re in [r"9\w", r"10\w", r"11\w", r"12\w"]  # <--- regex btw
    ]

    # dividing the plot into 4 subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 7))

    subplot_positions = [
        (0, 0),
        (0, 1),
        (1, 0),
        (1, 1),
    ]  # since there are only 4 classes / subplots
    linestyles = [":", "-", "--", "-.", "solid"]

    for idx in range(4):
        pos = subplot_positions[idx]
        class_df = class_wise_dataframes[idx]
        sections = class_df.index

        for idx, (candidate_name, candidate_series) in enumerate(class_df.items()):
            # plotting a subplot for each class
            axes[pos].plot(
                sections,
                candidate_series,
                label=candidate_name.replace(_UNDERSCORE, _SPACE), #type: ignore
                linestyle=linestyles[idx],
            ) 

        axes[pos].set_xlabel("class")
        axes[pos].set_ylabel("Votes")

        # axes[pos].set_ylim(0, post_df.max().max() + 1)

        # values on y-axis would have a difference of 2
        axes[pos].yaxis.set_major_locator(MultipleLocator(2))

    fig.suptitle(post_name, fontsize=32)

    # setting a common legend for the whole plot
    handles, labels = axes[0, 0].get_legend_handles_labels()
    fig.legend(handles, labels, loc="upper right", ncols=2, fontsize=15)

    plt.show()


plot_popularity_trends("Captain Boy", cb)
plot_popularity_trends("Captain Girl", cg)
plot_popularity_trends("Vice Captain Boy", vcb)
plot_popularity_trends("Vice Captain Girl", vcg)

In [None]:
%%script false --no-raise-error
<h1 class="header">Plotting the share in percent of classes in which a candidate has majority</h1>
<div class="text-box">
    <p class="text">Working:</p>
    <ol><li><p class="text">Dividing the plot into four subplots</p></li><li><p class="text">Extracting the count of classes in which a particular candidate has the maximum votes amongst all other candidates of same post</p></li><li><p class="text">Dividing the series obtained in previous step witht the total number of votes to get the percent share series</p></li><li><p class="text">Plotting the percent share series</p></li></ol>
</div>

In [None]:
total_classes = len(cb.index)

fig, axes = plt.subplots(2, 2, figsize=(10, 7), constrained_layout=True)
fig.suptitle("Share (percent) of Classes in which a Candidate has a majority    ", fontsize=20)

subplot_positions = [
        (0, 0),
        (0, 1),
        (1, 0),
        (1, 1),
    ]

colors = plt.cm.copper_r(np.linspace(0,0.50,5)) # type: ignore


for idx, (post_name, post_df) in enumerate(result_dataframes.items()):
    pos = subplot_positions[idx]

    # Column wise maximum will give the winning candidate of that class
    classes_won_by_candidate_series = post_df.idxmax(1)
    count_series = (classes_won_by_candidate_series
                        .groupby(classes_won_by_candidate_series)
                        .count()
                        .sort_values(ascending=False)
                        )
    percents_series = count_series / total_classes
    
    max_val = percents_series.max()

    axes[pos].set_title(post_name)
    axes[pos].pie(
        percents_series,
        labels=percents_series.index.map(
            lambda name: name.replace(_UNDERSCORE, _SPACE)
        ),
        autopct="%1.1f%%",
        startangle=180,
        colors=colors,
    )
    axes[pos].set(aspect='equal')


plt.show()

In [None]:
%%script false --no-raise-error
<div class="text-box bulleted">
    <p class="text">Furthermore we can notice despite being the second leading candidate (Aadityaraje Desai), they have the almost double the class-wise majority share than the leading candidate (Praneel Deshmukh) for the post of School Captain</p>
    <p class="text">The absence of fifth candidate (Riya Shirode) in the fourth pie shows that they are not the majority in any class amongst all other candidates of the same post</p>
</div>

In [None]:
%%script false --no-raise-error
<h1 class="header">Candidate Co-Voting Patterns</h1>

<div class="text-box bulleted">
    <p class="text">Analyzes whether voters who supported one candidate also tended to support another.</p>
    <p class="text">Working:</p>
</div>
<div class="text-box">
    <ol>
        <li><p class="text">Builds a <em>vote matrix</em> recording which candidates were chosen in each voting session.</p><pre class="language-py"><code class="language-py"><p class="text comment">// Example</p>
        Captain Boy     Captain Girl  Vice Captain Boy Vice Captain Girl class
25    Praneel Deshmukh  Gauravi Zade      Viren Jadhav       Kavya Mehta   10C
26   Aadityaraje Desai  Gauravi Zade  Avaneesh Mahalle       Kavya Mehta   10C
27  Abhichandra Charke  Gauravi Zade  Avaneesh Mahalle     Ketaki Phalle   10C
28    Praneel Deshmukh  Gauravi Zade      Sagnik Ghosh    Sumedha Vaidya   10C
29  Abhichandra Charke  Gauravi Zade      Sagnik Ghosh    Sumedha Vaidya   10C
30  Abhichandra Charke  Gauravi Zade  Avaneesh Mahalle     Ketaki Phalle   10C</code></pre></li>
<li><p class="text">Constructs a <em>co-occurrence matrix</em> showing how often Candidate B was voted when Candidate A was voted.<pre class="language-py"><code class="language-py"><p class="comment">// Example</p>
                    Gauravi Zade  Kirthika Jayachander  Naisha Rastogi
Avaneesh Mahalle            94                     4              26   
Krishna Yadav               54                     3              14   
Viren Jadhav                33                     2              11   
Ketaki Phalle              116                     5              38   
Trisha Kandpal              52                     5              15   
Riya Shirode                 5                     3               0   
Kavya Mehta                 52                     3              18   
Sumedha Vaidya              27                     4               1   
</code></pre></p></li><li><p class="text">Normalizes it into a <em>probability matrix</em> to estimate the likelihood of co-support between candidates. Each row of the co-occurrence matrix is divided by the total votes in that row. This converts raw counts into conditional probabilities, i.e., the chance of Candidate B being voted given that Candidate A was voted.</p></li><li><p class="text">Visualizes both matrices using heatmaps — one for raw counts, the other for probabilities.</p></li></ol>
</div>

In [None]:
[*candidate_data.keys(), "class"]

In [None]:
# constructing votes matrix
# vote matrix contains which candidate was voted for which post in any particular voting session
vote_matrix = pd.DataFrame(columns=[*candidate_data.keys(), "class"])
for _class in class_wise_documents:
    for session_votes in _class["votes"]:

        idx = len(vote_matrix)
        vote_dict = {}
        vote_dict["class"] = _class["name"]
        for vote in session_votes:
            vote_dict[vote["post"]] =  vote["name"]  # type:ignore
        vote_matrix.loc[idx] = vote_dict

all_candidates = []
for _, candidates in candidate_data.items():
    all_candidates.extend(candidates)
print(vote_matrix.iloc[25:31])

In [None]:
vote_only_matrix = vote_matrix[["Captain Boy", "Captain Girl", "Vice Captain Boy", "Vice Captain Girl"]]

# co-occurance matrix is the matrix showing how many times candidate B was voted when candidate A was voted
# co-occurance matrix would be N * N where N are the total number of candidates across all posts (18 * 18 for this case)
co_occurance_matrix = pd.DataFrame(0, index=all_candidates, columns=all_candidates)

# updating co-occurance matrix
for idx, session in vote_only_matrix.iterrows():
    for name_A in session.values:
        for name_B in session.values:
            if name_A != name_B:
                co_occurance_matrix.loc[name_A, name_B] += 1

# creating conditional probability matrix
# conditional probabilty matrix is created by normalizing columns of co-occurance matrix
# normalizing means dividing each row of co-occurance matrix by the total votes in that row
# the matrix gives the probabilty of person b (x axis) being voted when person A (y axis) was voted
probability_matrix = co_occurance_matrix.div(co_occurance_matrix.sum(axis=1), axis=0)

# ---- first plot ----
fig1, ax1 = plt.subplots(figsize=(12, 8))
ax1.set_title(
    "Co-occurance plot - Number of times person A got voted when person B was voted",
    size=16,
)
sns.heatmap(
    co_occurance_matrix,
    cmap="viridis",
    vmin=0,
    annot=True,
    ax=ax1,
    fmt=".0f",
    cbar_kws={"label": "Count"},
)
ax1.set_ylabel("Person A", size=12)
ax1.set_xlabel("Person B", size=12)
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
ax1.set_yticklabels(ax1.get_yticklabels(), rotation=0)
plt.show()

# ---- second plot ----
fig2, ax2 = plt.subplots(figsize=(12, 8))
ax2.set_title(
    "Probability plot - Probabilty (in percent) of person B getting voted when person A was voted",
    size=16,
)
sns.heatmap(
    probability_matrix * 100,
    cmap="viridis",
    vmin=0,
    annot=True,
    ax=ax2,
    fmt=".1f",
    cbar_kws={"label": "Percent"},
)
ax2.set_ylabel("Person A", size=12)
ax2.set_xlabel("Person B", size=12)
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=90)
ax2.set_yticklabels(ax2.get_yticklabels(), rotation=0)
plt.show()

# the percents here for a column dont add up to 100 cuz they are mutually exclusive events


In [None]:
%%script false --no-raise-error
<h1 class="header">Strongest & Weakest Allies</h1>

<div class="text-box bulleted">
    <p class="text">Identifies which candidates tend to appear most often (or least often) alongside another candidate in votes.</p>
</div>

<div class="text-box">
    <p class="text">Working:</p>
    <ol><li><p class="text">For each candidate, extracts their row from the <em>probability matrix</em> (probability of other candidates being voted when this candidate is chosen).</p></li><li><p class="text">Removes same-post candidates to avoid trivial overlaps (since voters pick only one per post).</p></li><li><p class="text">Finds the <strong>Strongest Ally</strong> → candidate with the highest co-vote probability.</p></li><li><p class="text">Finds the <strong>Weakest Ally</strong> → candidate with the lowest co-vote probability.</p></li><li><p class="text">Combines results into a summary table, showing strongest and weakest allies for each candidate.</p></li></ol>
</div>

In [None]:
# Use row indexes for all comparisions

strongest_ally_series = pd.Series(name="Strongest Ally")
weakest_ally_series = pd.Series(name="Weakest Ally")

for post_name, same_post_candidates in candidate_data.items():
    for name in same_post_candidates:
        # extracting the row which gives co-occurance probabilty for a candidate
        cps = probability_matrix.loc[name]

        # removing values of all the candidates in the same post
        candidate_probability_series = cps[~cps.index.isin(same_post_candidates)]

        strongest_ally_series[name] = candidate_probability_series.idxmax()
        weakest_ally_series[name] = candidate_probability_series.idxmin()

# concat based on similar rows
summary = pd.concat([strongest_ally_series, weakest_ally_series], axis=1)
print(summary.sort_values(by=list(summary.columns)))

# strongest ally is the candidate who is most likely to be voted when a candidate is voted
# weakest ally is the candidate who is least likely to be voted when a candidate is voted

In [None]:
# mean co-support - mean of all conditional probabilties across all candidates
probability_matrix.mean() * 100

In [None]:
# exporting votes to csv
vote_only_matrix.to_csv("votes.csv")

In [None]:
unqiue_votes = vote_only_matrix.value_counts()
print(f"Most popular choice groups\n\n{unqiue_votes.head(3)}")

In [None]:
a = vote_matrix.value_counts()
a.head()

In [None]:
%%script false --no-raise-error
<!-- TODO: Force this onto new page -->
<h1 class="header">Raw csv votes</h1>