In [8]:
import random
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import time
import timeit

In [9]:
import warnings
warnings.filterwarnings('ignore')
np.set_printoptions(precision=3, suppress = True)

In [10]:
df = pd.read_csv("FIPS_Counties_Clustered.csv")

In [11]:
df.drop(df.columns[0], axis=1, inplace=True)

# SAVE SOME GLOBAL VARIABLES
DF_COL_NAMES = df.columns

FIPS = df['FIPS']
STATE = df["State"]
COUNTY = df["County"]

df.head()

Unnamed: 0,% Fair/Poor,Physically Unhealthy Days,Mentally Unhealthy Days,% LBW,% Smokers,% Obese,Food Environment Index,% Physically Inactive,% With Access,% Excessive Drinking,...,KPCA_5,KM_UMAP,KM_LLE,KM_tSNE,KM_ISO,KM_PCA,KM_KPCA,FIPS,State,County
0,0.110673,0.389001,0.599601,-0.0461,0.30875,1.277396,-0.229372,1.01294,0.264421,-0.125322,...,-0.037946,3,3,2,4,0,2,1001,Alabama,Autauga
1,0.110673,0.249076,0.436841,-0.0461,-0.236706,-0.244628,0.458913,-0.338015,0.39442,-0.125322,...,0.142256,4,1,2,4,3,2,1003,Alabama,Baldwin
2,1.810044,1.648322,1.087881,1.410135,1.126935,2.581988,-1.605941,0.433959,-0.385571,-1.35104,...,-0.053751,0,4,4,1,4,1,1005,Alabama,Barbour
3,0.535516,0.66885,0.599601,1.410135,0.581479,1.277396,0.114771,1.784914,-2.032219,-0.431752,...,0.008361,0,3,4,1,0,3,1007,Alabama,Bibb
4,0.747937,0.808775,1.250641,-0.0461,0.581479,0.407668,0.889091,0.626953,-1.728889,-0.738181,...,0.128077,0,3,4,1,0,3,1009,Alabama,Blount


In [12]:
def recode_col(df, existing_col, mapping_dict, new_col):
    df[new_col] = df[existing_col].map(mapping_dict)


# Add State Abbreviations
states_dict = {'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', 'Colorado': 'CO',
               'Connecticut': 'CT', 'Delaware': 'DE',  'District of Columbia': 'DC', 'Florida': 'FL', 'Georgia': 'GA', 
               'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 
               'Kentucky': 'KY', 'Louisiana': 'LA',  'Maine': 'ME','Maryland':'MD','Massachusetts': 'MA','Michigan': 'MI', 
               'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 
               'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM','New York': 'NY', 'North Carolina': 'NC', 
               'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR','Pennsylvania':'PA', 
               'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 
               'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',  'West Virginia': 'WV', 'Wisconsin': 
               'WI', 'Wyoming': 'WY', 'Guam': 'GU', 'Puerto Rico': 'PR', 'Virgin Islands': 'VI'}

recode_col(df, "State", states_dict, "StateAbbr")


In [13]:
map_df_dup = df.copy()
mapme = map_df_dup.groupby(['State', 'StateAbbr', 'County']).apply(lambda dfx: (dfx["KM_UMAP"] * 1)).reset_index()

In [14]:
import plotly.express as px
from textwrap import wrap

named_colorscales = px.colors.named_colorscales()
#print("\n".join(wrap("".join('{:<12}'.format(c) for c in named_colorscales), 96)))

fig = px.choropleth(mapme,  # Input Pandas DataFrame
                    locations = "StateAbbr",  # DataFrame column with locations
                    color = "KM_UMAP",  # DataFrame column with color values
                    color_continuous_scale = 'turbo',
                    hover_name = "State", # DataFrame column hover info
                    locationmode = 'USA-states') # Set to plot as US States
fig.update_layout(
    title_text = 'UMAP Groups by State', # Create a Title
    geo_scope ='usa',  # Plot only the USA instead of globe
)
fig.show() 

In [15]:
map_df_dup = df.copy()
mapme = map_df_dup.groupby(['State', 'StateAbbr', 'County']).apply(lambda dfx: (dfx["KM_LLE"] * 1)).reset_index()


fig = px.choropleth(mapme,  # Input Pandas DataFrame
                    locations = "StateAbbr",  # DataFrame column with locations
                    color = "KM_LLE",  # DataFrame column with color values
                    color_continuous_scale = 'turbo',
                    hover_name = "State", # DataFrame column hover info
                    locationmode = 'USA-states') # Set to plot as US States
fig.update_layout(
    title_text = 'LLE Groups by State', # Create a Title
    geo_scope ='usa',  # Plot only the USA instead of globe
)
fig.show() 

In [16]:
map_df_dup = df.copy()
mapme = map_df_dup.groupby(['State', 'StateAbbr', 'County']).apply(lambda dfx: (dfx["KM_tSNE"] * 1)).reset_index()


fig = px.choropleth(mapme,  # Input Pandas DataFrame
                    locations = "StateAbbr",  # DataFrame column with locations
                    color = "KM_tSNE",  # DataFrame column with color values
                    color_continuous_scale = 'turbo',
                    hover_name = "State", # DataFrame column hover info
                    locationmode = 'USA-states') # Set to plot as US States
fig.update_layout(
    title_text = 'tSNE Groups by State', # Create a Title
    geo_scope ='usa',  # Plot only the USA instead of globe
)
fig.show() 

In [17]:
map_df_dup = df.copy()
mapme = map_df_dup.groupby(['State', 'StateAbbr', 'County']).apply(lambda dfx: (dfx["KM_ISO"] * 1)).reset_index()


fig = px.choropleth(mapme,  # Input Pandas DataFrame
                    locations = "StateAbbr",  # DataFrame column with locations
                    color = "KM_ISO",  # DataFrame column with color values
                    color_continuous_scale = 'turbo',
                    hover_name = "State", # DataFrame column hover info
                    locationmode = 'USA-states') # Set to plot as US States
fig.update_layout(
    title_text = 'ISO Groups by State', # Create a Title
    geo_scope ='usa',  # Plot only the USA instead of globe
)
fig.show() 

In [18]:
map_df_dup = df.copy()
mapme = map_df_dup.groupby(['State', 'StateAbbr', 'County']).apply(lambda dfx: (dfx["KM_PCA"] * 1)).reset_index()


fig = px.choropleth(mapme,  # Input Pandas DataFrame
                    locations = "StateAbbr",  # DataFrame column with locations
                    color = "KM_PCA",  # DataFrame column with color values
                    color_continuous_scale = 'turbo',
                    hover_name = "State", # DataFrame column hover info
                    locationmode = 'USA-states') # Set to plot as US States
fig.update_layout(
    title_text = 'PCA Groups by State', # Create a Title
    geo_scope ='usa',  # Plot only the USA instead of globe
)
fig.show() 

In [19]:
map_df_dup = df.copy()
mapme = map_df_dup.groupby(['State', 'StateAbbr', 'County']).apply(lambda dfx: (dfx["KM_KPCA"] * 1)).reset_index()


fig = px.choropleth(mapme,  # Input Pandas DataFrame
                    locations = "StateAbbr",  # DataFrame column with locations
                    color = "KM_KPCA",  # DataFrame column with color values
                    color_continuous_scale = 'turbo',
                    hover_name = "State", # DataFrame column hover info
                    locationmode = 'USA-states') # Set to plot as US States
fig.update_layout(
    title_text = 'KPCA Groups by State', # Create a Title
    geo_scope ='usa',  # Plot only the USA instead of globe
)
fig.show() 