In [1]:
import pandas as pd
import re

In [2]:
county_data = pd.read_csv("Data/county_data.csv", skiprows = [1])

In [18]:
county_data.columns.to_list()

['State FIPS Code',
 'County FIPS Code',
 '5-digit FIPS Code',
 'State Abbreviation',
 'Name',
 'Premature death raw value',
 'Premature death (Black)',
 'Premature death (Hispanic)',
 'Premature death (White)',
 'Poor or fair health raw value',
 'Poor physical health days raw value',
 'Poor mental health days raw value',
 'Low birthweight raw value',
 'Low birthweight (Black)',
 'Low birthweight (Hispanic)',
 'Low birthweight (White)',
 'Adult smoking raw value',
 'Adult obesity raw value',
 'Food environment index raw value',
 'Physical inactivity raw value',
 'Access to exercise opportunities raw value',
 'Excessive drinking raw value',
 'Alcohol-impaired driving deaths raw value',
 'Sexually transmitted infections raw value',
 'Teen births raw value',
 'Teen births (Black)',
 'Teen births (Hispanic)',
 'Teen births (White)',
 'Uninsured raw value',
 'Primary care physicians raw value',
 'Ratio of population to primary care physicians.',
 'Dentists raw value',
 'Ratio of population 

In [4]:
county_data = county_data.rename(columns = { "Uninsured adults numerator": "uninsured_adults_raw", 
                                             "Uninsured adults denominator": "adult_population_18_64"})


## Removing Columns 

In [5]:
def del_col(df, suffixes):
    for suffix in suffixes:
        for col in df.columns:
            if suffix in col:
                del df[col]
    return df

In [6]:
county_data_new = del_col(county_data, ["numerator", "denominator", "CI low", "CI high", "County Ranked", "Release Year"])

## Renaming Columns

In [7]:
column_names = {}
for col_name in county_data_new.columns:
    col_name_clean = re.sub(r"[()\"#/@;:<>{}`+=~|.!?,]", " ", col_name)
    col_name_clean = col_name_clean.replace("-"," ").replace("raw value","").lower()
    col_name_clean = '_'.join(col_name_clean.split()) 
    column_names[col_name] = col_name_clean
county_data_new = county_data_new.rename(columns=column_names)

In [8]:
county_data_new.columns = county_data_new.columns.str.replace("%", "percent")

In [9]:
county_data_new.to_csv("Data/county_data_clean.csv")

In [19]:
county_data_new.columns.to_list()

['state_fips_code',
 'county_fips_code',
 '5_digit_fips_code',
 'state_abbreviation',
 'name',
 'premature_death',
 'premature_death_black',
 'premature_death_hispanic',
 'premature_death_white',
 'poor_or_fair_health',
 'poor_physical_health_days',
 'poor_mental_health_days',
 'low_birthweight',
 'low_birthweight_black',
 'low_birthweight_hispanic',
 'low_birthweight_white',
 'adult_smoking',
 'adult_obesity',
 'food_environment_index',
 'physical_inactivity',
 'access_to_exercise_opportunities',
 'excessive_drinking',
 'alcohol_impaired_driving_deaths',
 'sexually_transmitted_infections',
 'teen_births',
 'teen_births_black',
 'teen_births_hispanic',
 'teen_births_white',
 'uninsured',
 'primary_care_physicians',
 'ratio_of_population_to_primary_care_physicians',
 'dentists',
 'ratio_of_population_to_dentists',
 'mental_health_providers',
 'ratio_of_population_to_mental_health_providers',
 'preventable_hospital_stays',
 'preventable_hospital_stays_black',
 'preventable_hospital_stays

## For Tableau 

In [21]:
county_data_tableau = county_data_new.copy()

In [22]:
county_data_tableau.columns = county_data_tableau.columns.str.replace("_", " ")

In [23]:
county_data_tableau.to_csv("Data/county_data_tableau.csv")