In [37]:
import seaborn as sns
import pandas as pd
import geopandas as gpd
import pysal
import numpy as np
import mapclassify
import matplotlib.pyplot as plt
import os
from PIL import Image
import IPython.display as display
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [2]:
# get the data
per_cap_income = gpd.read_file('uscountypcincome.gpkg')
per_cap_income.columns

Index(['STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME', 'NAMELSAD', 'LSAD',
       'CLASSFP', 'MTFCC', 'CSAFP', 'CBSAFP', 'METDIVFP', 'FUNCSTAT', 'ALAND',
       'AWATER', 'INTPTLAT', 'INTPTLON', 'Unnamed: 0', 'GeoFIPS', 'GeoName',
       'Region', 'TableName', 'LineCode', 'IndustryClassification',
       'Description', 'Unit', '1969', '1970', '1971', '1972', '1973', '1974',
       '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983',
       '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992',
       '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
       '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
       '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019',
       '2020', '2021', 'geometry'],
      dtype='object')

In [3]:
# get data shape
per_cap_income.shape

(3090, 80)

In [25]:
# lets first focus on the data from TN
# Filter the data for Tennessee (TN) using the FIPS code 47
tn_per_cap_income = per_cap_income[per_cap_income['STATEFP'] == '47']

# Displaying the first few rows of the Tennessee dataset
tn_per_cap_income.head()

# convert the income data to numeric
years = [str(year) for year in range(1969, 2022)]
tn_per_cap_income[years] = tn_per_cap_income[years].apply(pd.to_numeric)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


#### TN Top Income by Conuty
Lets get the top 5 per capital income per county in the past 10 year


In [26]:
# Selecting the past 10 years
years = [str(year) for year in range(2012, 2022)]

# Getting the top 10 per capita income counties for each year
top_counties = {}
for year in years:
    top_counties[year] = tn_per_cap_income[['NAME', year]].sort_values(by=year, ascending=False).head(5)

top_counties

{'2012':             NAME   2012
 2478  Williamson  79265
 2495    Davidson  49761
 2519     Fayette  45155
 2427    Hamilton  44884
 2492      Shelby  42964,
 '2013':             NAME   2013
 2478  Williamson  80565
 2495    Davidson  49057
 2519     Fayette  45774
 2427    Hamilton  44238
 2492      Shelby  42541,
 '2014':             NAME   2014
 2478  Williamson  85053
 2495    Davidson  52665
 2519     Fayette  46082
 2427    Hamilton  45396
 2492      Shelby  43734,
 '2015':             NAME   2015
 2478  Williamson  92247
 2495    Davidson  54994
 2519     Fayette  49631
 2427    Hamilton  47647
 2480        Knox  45271,
 '2016':             NAME   2016
 2478  Williamson  92784
 2495    Davidson  57717
 2519     Fayette  51651
 2427    Hamilton  48383
 2480        Knox  46138,
 '2017':             NAME   2017
 2478  Williamson  91608
 2495    Davidson  61212
 2519     Fayette  53397
 2427    Hamilton  50148
 2489      Wilson  47614,
 '2018':             NAME   2018
 2478  Willia

Throughout the past decade, Williamson and Davidson consistently rank at the top for per capita income. A few counties, like Hamilton, Knox, and Shelby, also appear frequently in the top 5 list across the years.

#### TN Income Evolution
Now lets take a look at the map, we will plot the per capita income for each county in Tennessee, and see the trend across the past few decades.

In [68]:
# Years from 2000 to 2021
years_2000_2021 = [str(year) for year in range(2000, 2022)]

# Determine the global min and max per capita income values across the years
global_min = tn_per_cap_income[years_2000_2021].min().min()
global_max = tn_per_cap_income[years_2000_2021].max().max()
global_min, global_max

(13416, 107698)

In [69]:
# set path to store the images
curr_path = os.getcwd()
image_path = os.path.join(curr_path, 'images')
if not os.path.exists(image_path):
    os.mkdir(image_path)


In [70]:
filepaths_2000_2021 = []

for year in years_2000_2021:
    fig, ax = plt.subplots(figsize=(10, 4))
    tn_per_cap_income.plot(column=year, ax=ax, cmap='viridis', vmin=global_min, vmax=global_max)
    ax.set_title(f"Per Capita Income by County in Tennessee ({year})")
    ax.set_axis_off()
    
    # Adding a colorbar with the fixed scale
    cax = fig.add_axes([0.9, 0.1, 0.03, 0.8])
    sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=global_min, vmax=global_max))
    sm._A = []
    cbar = fig.colorbar(sm, cax=cax)
    cbar.set_label(f'Per Capita Income ({year})')
    
    # Save each plot to a file
    filepath = os.path.join(image_path, f"plot_{year}.png")
    plt.savefig(filepath, bbox_inches="tight")
    filepaths_2000_2021.append(filepath)
    plt.close()

In [73]:
# Compile all the images into a GIF
gif_path_2000_2021 = "tn_income_evolution_2010_2021.gif"
with Image.open(filepaths_2000_2021[0]) as img:
    img.save(gif_path_2000_2021, save_all=True, append_images=[Image.open(f) for f in filepaths_2000_2021[1:]], 
             optimize=False, duration=1000, loop=0)

In [74]:
display.Image(url=gif_path_2000_2021)