In [1]:
import pandas as pd
import re

In [2]:
county_data = pd.read_csv("Data/county_data.csv", skiprows = [1])

In [3]:
county_data.columns[0:100]

Index(['State FIPS Code', 'County FIPS Code', '5-digit FIPS Code',
       'State Abbreviation', 'Name', 'Release Year',
       'County Ranked (Yes=1/No=0)', 'Premature death raw value',
       'Premature death numerator', 'Premature death denominator',
       'Premature death CI low', 'Premature death CI high',
       'Premature death (Black)', 'Premature death (Hispanic)',
       'Premature death (White)', 'Poor or fair health raw value',
       'Poor or fair health numerator', 'Poor or fair health denominator',
       'Poor or fair health CI low', 'Poor or fair health CI high',
       'Poor physical health days raw value',
       'Poor physical health days numerator',
       'Poor physical health days denominator',
       'Poor physical health days CI low', 'Poor physical health days CI high',
       'Poor mental health days raw value',
       'Poor mental health days numerator',
       'Poor mental health days denominator', 'Poor mental health days CI low',
       'Poor mental healt

## Removing Columns 

In [4]:
def del_col(df, suffixes):
    for suffix in suffixes:
        for col in df.columns:
            if suffix in col:
                del df[col]
    return df

In [5]:
county_data_new = del_col(county_data, ["numerator", "denominator", "CI low", "CI high", "County Ranked", "Release Year"])

## Renaming Columns

In [6]:
column_names = {}
for col_name in county_data_new.columns:
    col_name_clean = re.sub(r"[()\"#/@;:<>{}`+=~|.!?,]", " ", col_name)
    col_name_clean = col_name_clean.replace("raw value","").replace(" ","_").lower()
    col_name_clean = col_name_clean.replace("-","_").replace("__","_").replace("___","_")
    col_name_clean = col_name_clean.rstrip("_")
    column_names[col_name] = col_name_clean
county_data_new = county_data_new.rename(columns=column_names)

In [7]:
county_data_new.columns[0:100]

Index(['state_fips_code', 'county_fips_code', '5_digit_fips_code',
       'state_abbreviation', 'name', 'premature_death',
       'premature_death_black', 'premature_death_hispanic',
       'premature_death_white', 'poor_or_fair_health',
       'poor_physical_health_days', 'poor_mental_health_days',
       'low_birthweight', 'low_birthweight_black', 'low_birthweight_hispanic',
       'low_birthweight_white', 'adult_smoking', 'adult_obesity',
       'food_environment_index', 'physical_inactivity',
       'access_to_exercise_opportunities', 'excessive_drinking',
       'alcohol_impaired_driving_deaths', 'sexually_transmitted_infections',
       'teen_births', 'teen_births_black', 'teen_births_hispanic',
       'teen_births_white', 'uninsured', 'primary_care_physicians',
       'ratio_of_population_to_primary_care_physicians', 'dentists',
       'ratio_of_population_to_dentists', 'mental_health_providers',
       'ratio_of_population_to_mental_health_providers',
       'preventable_hospit

In [8]:
county_data_new.columns[100:157]

Index(['median_household_income', 'median_household_income_black',
       'median_household_income_hispanic', 'median_household_income_white',
       'children_eligible_for_free_or_reduced_price_lunch',
       'residential_segregation__black_white',
       'residential_segregation__non_white_white', 'homicides',
       'firearm_fatalities', 'homeownership', 'severe_housing_cost_burden',
       'population', '%_below_18_years_of_age', '%_65_and_older',
       '%_non_hispanic_african_american',
       '%_american_indian_and_alaskan_native', '%_asian',
       '%_native_hawaiian_other_pacific_islander', '%_hispanic',
       '%_non_hispanic_white', '%_not_proficient_in_english', '%_females',
       '%_rural', 'communicable_disease',
       'self_inflicted_injury_hospitalizations', 'cancer_incidence',
       'coronary_heart_disease_hospitalizations',
       'cerebrovascular_disease_hospitalizations', 'smoking_during_pregnancy',
       'drug_arrests', 'opioid_hospital_visits',
       'alcohol

In [9]:
county_data_new.to_csv("Data/county_data_clean.csv")