# Does development lead to lower taxes?

## Preprocessing

In [None]:
# import packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sodapy import Socrata

In [None]:
# import mill rates
client_mill = Socrata('data.ct.gov', None)
results_mill = client_mill.get('emyx-j53e', limit=5000)
df_mill = pd.DataFrame.from_records(results_mill)
pd.reset_option('display.max_rows')

In [None]:
# inspect data
df_mill.info()

In [None]:
# import grand lists
client_grand = Socrata('data.ct.gov', None)
results_grand = client_grand.get('8rr8-a322', limit=5000)
df_grand = pd.DataFrame.from_records(results_grand)
pd.reset_option('display.max_rows')

In [None]:
# inspect data
df_grand.info()

In [None]:
# create keys
df_mill['key'] = df_mill['municipality'] + df_mill['grand_list_year']
df_grand['key'] = df_grand['town_name'] + df_grand['grand_list_year'] 

In [None]:
# join dataframes
df_join = df_mill.set_index('key').join(df_grand.set_index('key'), how='inner', lsuffix='_mill', rsuffix='_grand').sort_values(by=['key'])

In [None]:
df_join

In [None]:
# inspect nulls
mill_rate_real_personal_and_mill_rate_are_null = df_join[df_join.mill_rate_real_personal.isnull() & df_join.mill_rate.isnull()]
print(len(mill_rate_real_personal_and_mill_rate_are_null))

In [None]:
# drop nulls
df_drop = df_join.drop(mill_rate_real_personal_and_mill_rate_are_null.index)

In [None]:
# drop Stamford outlier
df_drop = df_drop.drop('Stamford2019')

In [None]:
# new column
df_new_column = df_drop
df_new_column['mill_real'] = df_new_column['mill_rate_real_personal']

# if mill_rate_real_property is null, use mill_rate instead
df_new_column['mill_real'].fillna(df_new_column['mill_rate'], inplace = True)

In [None]:
# convert columns to numeric
df = df_new_column.apply(pd.to_numeric, errors='ignore')

In [None]:
# inspect data
print('Mill Rate - mean', '\n', df.groupby('fiscal_year')['mill_real'].mean(), '\n')
print('Grand List - mean', '\n', df.groupby('fiscal_year')['total_equalized'].mean())

In [None]:
# create new dataframe
df_pc = df.sort_values(by=['town_name', 'fiscal_year'])

In [None]:
# new column
all_towns = pd.Series([])

for x in df_pc.town_name.unique():
    town_data = pd.Series(df_pc[df_pc.town_name == x].mill_real.pct_change())
    all_towns = pd.concat([all_towns, town_data])

df_pc['mill_real_pct_change'] = all_towns

In [None]:
# new column
all_towns = pd.Series([])

for x in df_pc.town_name.unique():
    town_data = pd.Series(df_pc[df_pc.town_name == x].total_equalized.pct_change())
    all_towns = pd.concat([all_towns, town_data])

df_pc['total_equalized_pct_change'] = all_towns

## Town Data & Graphs

In [None]:
# get user input
town = input('Enter town name: ').title()

In [None]:
# display means
print(town, '- Mill Rate - mean')
print(df[df.town_name == town].groupby('fiscal_year')['mill_real'].mean(), '\n')
print(town, '- Grand List - mean')
print(df[df.town_name == town].groupby('fiscal_year')['total_equalized'].mean())

In [None]:
# graph data
y_mr = df[df.town_name == town].groupby('fiscal_year')['mill_real'].mean()
y_gl = df[df.town_name == town].groupby('fiscal_year')['total_equalized'].mean()

fig, ax1 = plt.subplots()
fig.suptitle(town)

color = 'tab:red'
ax1.set_xlabel('Fiscal Year')
ax1.set_ylabel('Mill Rate', color=color)
ax1.plot(y_mr, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Grand List', color=color)  # we already handled the x-label with ax1
ax2.plot(y_gl, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()

In [None]:
# display percent change
print(town, '- Mill Rate - percent change')
print(df_pc[df_pc.town_name == town]['mill_real_pct_change'], '\n')
print(town, '- Grand List - percent change')
print(df_pc[df_pc.town_name == town]['total_equalized_pct_change'])

In [None]:
# graph data
x = df_pc[df_pc.town_name == town]['total_equalized_pct_change']*100
y = df_pc[df_pc.town_name == town]['mill_real_pct_change']*100

plt.title(town + ' - Percent Change') 
plt.xlabel('Grand List') 
plt.ylabel('Mill Rate') 
plt.scatter(x,y) 
plt.axhline(0,color='red') # x = 0
plt.axvline(0,color='red') # y = 0
plt.show()

In [None]:
# scenario count
q1 = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change < 0) & (df_pc.mill_real_pct_change > 0)]
q2 = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change > 0) & (df_pc.mill_real_pct_change > 0)]
q3 = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change > 0) & (df_pc.mill_real_pct_change < 0)]
q4 = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change < 0) & (df_pc.mill_real_pct_change < 0)]
pos_0 = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change > 0) & (df_pc.mill_real_pct_change == 0)]
neg_0 = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change < 0) & (df_pc.mill_real_pct_change == 0)]
zeros = df_pc[(df_pc.town_name == town) & (df_pc.total_equalized_pct_change == 0) & (df_pc.mill_real_pct_change == 0)]

print(town + " - scenarios")
print('  grand - mill + ', len(q1), '\n', ' grand + mill + ', len(q2), '\n', '*grand + mill - ', len(q3), '\n', ' grand - mill - ', len(q4), '\n', ' grand + mill 0 ', len(pos_0), '\n', ' grand - mill 0 ', len(neg_0), '\n', ' grand 0 mill 0 ', len(zeros))

In [None]:
# graph data
df_cor_town = df_pc[df_pc.town_name == town]
df_cor_town = df_cor_town[['mill_real_pct_change', 'total_equalized_pct_change']]
df_matrix = df_cor_town.corr().round(2)

sns.heatmap(df_matrix, annot=True, vmax = 1, vmin = -1, center = 0, cmap = 'vlag')
plt.title(town + ' - correlations')
plt.yticks(rotation='horizontal')
plt.show()

## State Data & Graphs

In [None]:
# graph data
y_mr = df.groupby('fiscal_year')['mill_real'].mean()
y_gl = df.groupby('fiscal_year')['total_equalized'].mean()

fig, ax1 = plt.subplots()
fig.suptitle('All Towns')

color = 'tab:red'
ax1.set_xlabel('Fiscal Year')
ax1.set_ylabel('Mill Rate', color=color)
ax1.plot(y_mr, color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Grand List', color=color)  # we already handled the x-label with ax1
ax2.plot(y_gl, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()

In [None]:
# display percent change
print('All Towns - Mill Rate - percent change')
print(df_pc['mill_real_pct_change'], '\n')
print('All Towns - Grand List - percent change')
print(df_pc['total_equalized_pct_change'])

In [None]:
# graph data
x = df_pc['total_equalized_pct_change']*100
y = df_pc['mill_real_pct_change']*100

plt.title('All Towns - Percent Change') 
plt.xlabel('Grand List') 
plt.ylabel('Mill Rate') 
plt.scatter(x,y) 
plt.axhline(0,color='red') # x = 0
plt.axvline(0,color='red') # y = 0
plt.show()

In [None]:
# scenario count
q1 = df_pc[(df_pc.total_equalized_pct_change < 0) & (df_pc.mill_real_pct_change > 0)]
q2 = df_pc[(df_pc.total_equalized_pct_change > 0) & (df_pc.mill_real_pct_change > 0)]
q3 = df_pc[(df_pc.total_equalized_pct_change > 0) & (df_pc.mill_real_pct_change < 0)]
q4 = df_pc[(df_pc.total_equalized_pct_change < 0) & (df_pc.mill_real_pct_change < 0)]
pos_0 = df_pc[(df_pc.total_equalized_pct_change > 0) & (df_pc.mill_real_pct_change == 0)]
neg_0 = df_pc[(df_pc.total_equalized_pct_change < 0) & (df_pc.mill_real_pct_change == 0)]
zeros = df_pc[(df_pc.total_equalized_pct_change == 0) & (df_pc.mill_real_pct_change == 0)]

print('All Towns - scenarios')
print('  grand - mill + ', len(q1), '\n', ' grand + mill + ', len(q2), '\n', '*grand + mill - ', len(q3), '\n', ' grand - mill - ', len(q4), '\n', ' grand + mill 0 ', len(pos_0), '\n', ' grand - mill 0 ', len(neg_0), '\n', ' grand 0 mill 0 ', len(zeros))

In [None]:
# graph data
df_cor_state = df_pc[['mill_real_pct_change', 'total_equalized_pct_change']]
df_matrix = df_cor_state.corr().round(2)

sns.heatmap(df_matrix, annot=True, vmax = 1, vmin = -1, center = 0, cmap = 'vlag')
plt.title('All Towns - correlations')
plt.yticks(rotation='horizontal')
plt.show()