# 2024 week 6: Staff Income Tax


https://preppindata.blogspot.com/2024/02/2024-week-6-staff-income-tax.html


## Solution


In [213]:
import pandas as pd

pd.options.mode.copy_on_write = True

In [214]:
# Import input data
df = pd.read_csv("data/input.csv")
df

Unnamed: 0,StaffID,1,2,3,4,5,6,7,8,9,10,11,12
0,1533,2398.0,2421.98,2446.20,2446.20,2495.12,2495.12,2495.12,2495.12,2545.03,2621.38,2621.38,2621.38
1,1339,7304.0,7523.12,7673.58,7673.58,7750.32,7827.82,8062.66,8304.54,8470.63,8555.33,8555.33,8726.44
2,2291,8240.0,8404.80,8572.90,8744.35,8831.80,9096.75,9278.69,9464.26,9464.26,9464.26,9558.90,9558.90
3,2038,3908.0,3986.16,3986.16,4026.02,4066.28,4188.27,4313.92,4443.34,4487.77,4622.40,4668.63,4715.31
4,2810,3988.0,4107.64,4148.72,4190.20,4274.01,4316.75,4316.75,4359.92,4490.71,4535.62,4671.69,4718.41
...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,2959,9163.0,9163.00,9163.00,9163.00,9437.89,9626.65,9915.45,10014.60,10014.60,10014.60,10214.89,10521.34
995,1467,1928.0,1985.84,2045.42,2086.32,2128.05,2170.61,2235.73,2258.09,2303.25,2372.35,2396.07,2443.99
996,2582,5343.0,5449.86,5558.86,5614.45,5726.73,5898.54,6016.51,6197.00,6320.94,6320.94,6320.94,6320.94
997,1779,11138.0,11472.14,11816.30,11816.30,11934.47,12173.16,12538.35,12789.12,13172.79,13304.52,13304.52,13570.61


In [215]:
# Find the latest salaries of each person
df = df.groupby("StaffID", as_index=False).last()
df

Unnamed: 0,StaffID,1,2,3,4,5,6,7,8,9,10,11,12
0,1000,13416.0,13550.16,13685.66,13822.52,13960.74,14239.96,14667.16,14960.50,15110.11,15110.11,15261.21,15413.82
1,1001,12518.0,12518.00,12893.54,13151.41,13545.95,13681.41,14091.86,14514.61,14950.05,14950.05,14950.05,15099.55
2,1007,2134.0,2176.68,2220.21,2264.62,2264.62,2287.26,2310.14,2333.24,2403.24,2475.33,2524.84,2575.34
3,1010,8260.0,8507.80,8763.03,9025.93,9025.93,9206.44,9390.57,9484.48,9769.01,9769.01,9769.01,10062.08
4,1012,8669.0,8755.69,9018.36,9288.91,9381.80,9475.62,9759.89,9955.08,10154.19,10154.19,10458.81,10667.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,2994,8459.0,8712.77,8712.77,8887.03,8887.03,9153.64,9153.64,9428.25,9711.09,10002.43,10302.50,10302.50
799,2995,10672.0,10672.00,10672.00,10778.72,10994.29,11104.24,11437.36,11437.36,11551.74,11782.77,11782.77,11782.77
800,2997,2436.0,2509.08,2534.17,2610.20,2636.30,2662.66,2715.91,2743.07,2797.93,2825.91,2910.69,2998.01
801,2998,1889.0,1889.00,1945.67,1965.13,2024.08,2024.08,2084.80,2126.50,2147.76,2169.24,2169.24,2212.63


In [216]:
# Find each person's annual salary
df["annual_salary"] = df.sum(1)
# df = pd.DataFrame(df.annual_salary)
df = df[["StaffID", "annual_salary"]]
df

Unnamed: 0,StaffID,annual_salary
0,1000,174197.95
1,1001,167865.48
2,1007,28976.52
3,1010,112043.29
4,1012,116751.53
...,...,...
798,2994,114706.65
799,2995,137663.02
800,2997,35376.93
801,2998,27645.13


In [217]:
# Set up UK income tax brackets
data = {
    "taxable_income_lower_bound": [
        0,
        12571,
        50271,
        125141,
    ],
    "tax_rate": [
        0,
        0.2,
        0.4,
        0.45,
    ],
}
tax_brackets = (
    pd.DataFrame(data)
    .sort_values(["taxable_income_lower_bound"])
    .reset_index(names="tax_rate_index")
)
tax_brackets

Unnamed: 0,tax_rate_index,taxable_income_lower_bound,tax_rate
0,0,0,0.0
1,1,12571,0.2
2,2,50271,0.4
3,3,125141,0.45


In [218]:
tax_brackets["tax_rate_percentage"] = tax_brackets.tax_rate.map(
    lambda x: f"{x:.0%} rate"
)
tax_brackets

Unnamed: 0,tax_rate_index,taxable_income_lower_bound,tax_rate,tax_rate_percentage
0,0,0,0.0,0% rate
1,1,12571,0.2,20% rate
2,2,50271,0.4,40% rate
3,3,125141,0.45,45% rate


In [219]:
# Identify maximum tax rates
df["staff_tax_rate_index"] = pd.Series(
    tax_brackets.taxable_income_lower_bound.searchsorted(df.annual_salary) - 1,
    index=df.index,
)
df.merge(tax_brackets, left_on="staff_tax_rate_index", right_index=True)

Unnamed: 0,StaffID,annual_salary,staff_tax_rate_index,tax_rate_index,taxable_income_lower_bound,tax_rate,tax_rate_percentage
0,1000,174197.95,3,3,125141,0.45,45% rate
1,1001,167865.48,3,3,125141,0.45,45% rate
2,1007,28976.52,1,1,12571,0.20,20% rate
3,1010,112043.29,2,2,50271,0.40,40% rate
4,1012,116751.53,2,2,50271,0.40,40% rate
...,...,...,...,...,...,...,...
798,2994,114706.65,2,2,50271,0.40,40% rate
799,2995,137663.02,3,3,125141,0.45,45% rate
800,2997,35376.93,1,1,12571,0.20,20% rate
801,2998,27645.13,1,1,12571,0.20,20% rate


In [220]:
x = df.merge(tax_brackets, how="cross")
x[x.tax_rate_index <= x.staff_tax_rate_index]

Unnamed: 0,StaffID,annual_salary,staff_tax_rate_index,tax_rate_index,taxable_income_lower_bound,tax_rate,tax_rate_percentage
0,1000,174197.95,3,0,0,0.00,0% rate
1,1000,174197.95,3,1,12571,0.20,20% rate
2,1000,174197.95,3,2,50271,0.40,40% rate
3,1000,174197.95,3,3,125141,0.45,45% rate
4,1001,167865.48,3,0,0,0.00,0% rate
...,...,...,...,...,...,...,...
3204,2998,27645.13,1,0,0,0.00,0% rate
3205,2998,27645.13,1,1,12571,0.20,20% rate
3208,2999,88975.30,2,0,0,0.00,0% rate
3209,2999,88975.30,2,1,12571,0.20,20% rate
