In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

In [2]:
warnings.filterwarnings("ignore")

In [4]:
def counts(df, bycolumn):

    plt.figure(figsize=(12, 6))
    ax = sns.countplot(data=df, x=bycolumn)

    for p in ax.patches:
        height = p.get_height()
        ax.annotate(
            f'{height}', 
            (p.get_x() + p.get_width() / 2., height),
            ha='center', va='bottom'
        )

    bars = ax.patches
    n = len(bars)

    colors = sns.color_palette("Blues", n)  

    for bar, color in zip(bars, colors):
        bar.set_color(color)

    plt.show()

    return

In [6]:
def salary_pie(df, kpi, bycolumn):

    totals = df.groupby(bycolumn)[kpi].sum()

    plt.figure(figsize=(12, 8))
    plt.pie(
        totals,
        labels=totals.index,
        autopct='%1.1f%%',
        startangle=90,
        counterclock=False
    )

    plt.title(f"Share of {kpi} by {bycolumn}")
    plt.tight_layout()
    plt.show()

In [8]:
def salary_bars(df, kpi, bycolumn):

    totals = df.groupby(bycolumn)[kpi].mean().reset_index()
    totals = totals.sort_values(by=kpi, ascending=False)

    plt.figure(figsize=(12, 6))
    ax = sns.barplot(data=totals, x=bycolumn, y=kpi, palette="Blues")

    for p in ax.patches:
        height = p.get_height()
        ax.annotate(
            f'{height:,.0f}', 
            (p.get_x() + p.get_width() / 2., height),
            ha='center', va='bottom'
        )

    plt.title(f"Avg. of {kpi} by {bycolumn}")
    plt.tight_layout()
    plt.show()

    return

In [10]:
def stacked_bar_100(df, x_col, stack_col):

    plt.figure(figsize=(12, 6))

    counts = pd.crosstab(df[x_col], df[stack_col])
    
    percentages = counts.div(counts.sum(axis=1), axis=0) * 100

    colors = sns.color_palette("Blues", n_colors=percentages.shape[1])

    ax = percentages.plot(
        kind='bar',
        stacked=True,
        figsize=(10,6),
        color=colors
    )

    for i, gender in enumerate(percentages.index):
        bottom = 0
        for j, edu_level in enumerate(percentages.columns):
            pct = percentages.loc[gender, edu_level]
            if pct > 0:  # only annotate if non-zero
                ax.text(
                    i, 
                    bottom + pct / 2, 
                    f'{pct:.1f}%', 
                    ha='center', 
                    va='center', 
                    color='white', 
                    fontsize=10,
                    fontweight='bold'
                )
            bottom += pct

    plt.ylabel('Percentage (%)')
    plt.title(f'100% Stacked Bar of {stack_col} by {x_col}')
    plt.legend(title=stack_col, bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.show()

    return

In [12]:
def scatterplot(df, x_axis, y_axis, bycolumn):
    
    plt.figure(figsize=(12, 6))
    sns.scatterplot(data=df, x=x_axis, y=y_axis, hue=bycolumn)
    plt.show()

    return 