In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Polygon

We compiled the US States (and Washington DC) tax rates into one dataset from 2000-2023 based on the available information from the US Government. Then we added added it as a pandas DataFrame and rounded the numbers. 

In [2]:
df = pd.read_csv("taxrates.csv")
df = df.rename(columns={"State":"NAME", "STUSPS": "STATE"}).round(2)
df.head()

Unnamed: 0,NAME,STATE,2000,2001,2002,2003,2004,2005,2006,2007,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Alabama,AL,5.0,5.0,6.0,5.0,6.5,6.5,6.5,6.5,...,6.5,6.5,6.5,6.5,6.5,6.5,6.5,6.5,6.5,6.5
1,Alaska,AK,5.2,5.2,5.44,5.44,5.44,5.44,5.44,5.44,...,5.44,5.34,5.34,5.34,5.34,5.34,5.34,5.34,5.34,5.34
2,Arizona,AZ,8.0,7.97,6.97,6.97,6.97,6.97,6.97,6.97,...,6.5,5.5,5.5,4.9,4.9,4.9,4.9,4.9,4.9,4.9
3,Arkansas,AR,3.75,3.75,3.92,3.92,3.92,3.92,3.92,3.92,...,3.92,3.92,3.92,3.92,3.92,3.92,3.92,3.87,3.65,3.55
4,California,CA,8.84,8.84,8.84,8.84,8.84,8.84,8.84,8.84,...,8.84,8.84,8.84,8.84,8.84,8.84,8.84,8.84,8.84,8.84


To get an idea of the changes in the tax rates of each state, we transpose the data and plot a line graph using matplotlib.

In [3]:
df_t = df.T 
df_t.columns=df_t.iloc[0]
df_t = df_t.tail(-2)
df_t.head()


NAME,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,Florida,Georgia,...,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming,District of Columbia
2000,5.0,5.2,8.0,3.75,8.84,4.75,8.5,8.7,5.5,6.0,...,6.0,0.0,5.0,8.38,6.0,0.0,9.0,7.9,0.0,9.98
2001,5.0,5.2,7.97,3.75,8.84,4.63,7.5,8.7,5.5,6.0,...,6.0,0.0,5.0,8.38,6.0,0.0,9.0,7.9,0.0,9.98
2002,6.0,5.44,6.97,3.92,8.84,4.63,7.5,8.7,5.5,6.0,...,6.0,0.0,5.0,8.51,6.0,0.0,9.0,7.9,0.0,9.5
2003,5.0,5.44,6.97,3.92,8.84,4.63,7.5,8.7,5.5,6.0,...,6.0,0.0,5.0,8.51,6.0,0.0,9.0,7.9,0.0,9.5
2004,6.5,5.44,6.97,3.92,8.84,4.63,7.5,8.7,5.5,6.0,...,6.5,0.0,5.0,8.51,6.0,0.0,9.0,7.9,0.0,9.98


In [None]:
# plot the data
ax = df_t.plot(figsize=(40,15))

# set x-axis label and values
ax.set_xlabel("Time")
ax.set_ylabel("Tax rate(%)")

plt.show()

In [None]:
shapefile = 's_08mr23/s_08mr23.shp'
gdf = gpd.read_file(shapefile)
gdf

In [None]:
#drop Puerto rico, Fed States of Micronesia, American Samoa, US Virgin Islands, Guam, Commonwealth of the northern mariana islands from file
gdf = gdf.drop(index=[2,37,44,52,54,55,56,57])
gdf = gdf.reset_index(drop=True)

In [None]:
len(gdf)

In [None]:
gdf.plot()

Creating the merged dataset that will include the Tax rate as a column

In [None]:
df_test = df[['STATE','2022']].copy()
df_test.head()

We are going to clip Hawaii and Alaska and add them separately. 

In [None]:
alaska_gdf = gdf[gdf.STATE=='AK']
alaska_gdf = alaska_gdf.merge(df_test, on="STATE")
#clipping some of the western islands to make it fit better
polygon = Polygon([(-170,50),(-170,72),(-140, 72),(-140,50)])
alaska_gdf = alaska_gdf.clip(polygon)
alaska_gdf
alaska_gdf.plot()

In [None]:
hawaii_gdf = gdf[gdf.STATE=='HI']
hawaii_gdf = hawaii_gdf.merge(df_test, on="STATE")
hawaii_gdf
hawaii_gdf.plot()

Removing Alaska and Hawaii from the table due to mapping errors. Will add back later separately.

In [None]:
gdf_test = gdf.drop(gdf[gdf['STATE'] == 'AK'].index)
gdf_test = gdf_test.drop(gdf_test[gdf_test['STATE'] == 'HI'].index)
len(gdf_test)

Creating the merged dataset that will include the Tax rate as a column

In [None]:
df_test = df[['STATE','2022']].copy()
df_test.head()

In [None]:
gdf_new = gdf_test.merge(df_test, on="STATE")
gdf_new

Plotting the values on a geographical map we get a heatmap:

In [None]:
# Print the map
# Set the range for the choropleth
title = 'Tax rate in 2022'
col = '2022'
vmin = gdf_new[col].min()
vmax = gdf_new[col].max()
cmap = 'GnBu'
# Create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(20, 8))
# Remove the axis
ax.axis('off')
gdf_new.plot(column=col, ax=ax, edgecolor='0.8', linewidth=1, cmap=cmap, legend=True)
# Add a title
ax.set_title(title, fontdict={'fontname': 'Times New Roman','fontsize': '25', 'fontweight': '3'})
# Create an annotation for the data source

#Add Alaska
akax = fig.add_axes([0.1, 0.17, 0.2, 0.19])   
akax.axis('off')
alaska_gdf.plot(column=alaska_gdf["2022"], cmap="GnBu", ax=akax, linewidth=1, edgecolor='0.8')

#Add Hawaii
hiax = fig.add_axes([.28, 0.20, 0.1, 0.1])      
hiax.axis('off')
hawaii_gdf.plot(column=hawaii_gdf["2022"], cmap="GnBu", ax=hiax, linewidth=1, edgecolor='0.8')



In [None]:
from matplotlib.widgets import Button, Slider

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider

x = np.linspace(0, 3, 300)
y = np.sin(5 * np.pi * x)

fig, ax = plt.subplots()
l, = ax.plot(x, y)
plt.show()

In [None]:
fig.subplots_adjust(bottom=0.2)


In [None]:
x = np.linspace(0, 3, 300)
y = np.sin(5 * np.pi * x)

fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2)
l, = ax.plot(x, y)

def onChange(value):
    l.set_ydata(np.sin(value * np.pi * x))
    fig.canvas.draw_idle()

slideraxis = fig.add_axes([0.25, 0.1, 0.65, 0.03])
slider = Slider(slideraxis, label='Frequency [Hz]',
                valmin=0, valmax=10, valinit=0)
slider.on_changed(onChange)
plt.show()

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual


In [None]:
%matplotlib notebook