In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import numpy as np

file = "/Users/bryna/Downloads/2025_autumn_CSE_583_A_students.csv"

In [None]:
df = pd.read_csv(file)
df.columns

In [None]:
majors = df["Major"].copy()
major_replace_dict = {
    "Mechanical Engineering": "Mechanical Engineering",
    "Applied Mathematics": "Applied Mathematics",
    "Statistics": "Statistics",
    "Ecology & Resource Mgmt": "Ecology & Resource Mgmt",
    "Health Economics and Outcomes": "Health Economics and Outcomes",
    "Finance & Risk Management": "Finance & Risk Management",
    "Molecular and Cellular Biology": "Molecular and Cellular Biology",
    "Demographic Methods, Sociology": "Sociology & Demography",
    "Biomedical and Health Informatics":"Biomedical and Health Informatics"
}
for key, val in major_replace_dict.items():
    majors[majors.str.contains(key)] = val

major_hist = pd.DataFrame(majors.value_counts())

In [None]:
major_hist = major_hist.sort_values(by=["count", "Major"], ascending=[True, False])

In [None]:
major_hist.plot(y="count", kind="barh", legend=False)

In [None]:

# cribbed from here: https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_and_donut_labels.html
# color_sequences selected to not have the start & end color the same
# startangle selected to avoid overlap of major labels
# figsize also helps with avoiding label overlap

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(aspect="equal"))

majors = major_hist.index.to_list()
counts = major_hist["count"].to_list()

wedges, texts, pcts = ax.pie(
    counts,
    wedgeprops=dict(width=0.5),
    startangle=-18,
    colors = matplotlib.color_sequences["Set3"],
    autopct='%1.1f%%',
)

bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
kw = dict(
    arrowprops=dict(arrowstyle="-"),
    bbox=bbox_props,
    zorder=0,
    va="center"
)

for i, p in enumerate(wedges):
    ang = (p.theta2 - p.theta1)/2. + p.theta1
    y = np.sin(np.deg2rad(ang))
    x = np.cos(np.deg2rad(ang))
    horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
    connectionstyle = f"angle,angleA=0,angleB={ang}"
    kw["arrowprops"].update({"connectionstyle": connectionstyle})
    ax.annotate(
        majors[i],
        xy=(x, y),
        xytext=(1.35*np.sign(x), 1.4*y),
        horizontalalignment=horizontalalignment,
        **kw
    )

# plt.show()
plt.savefig("major_pie_chart.png", bbox_inches="tight")
plt.close()

In [None]:
print("number of students", major_hist["count"].sum())
print("min percent:", 100/major_hist["count"].sum())