In [1]:
import pandas as pd
import re

In [2]:
county_data = pd.read_csv("Data/county_data.csv", skiprows = [1])

In [14]:
county_data.columns[100:150]

Index(['Median household income raw value', 'Median household income (Black)',
       'Median household income (Hispanic)', 'Median household income (White)',
       'Children eligible for free or reduced price lunch raw value',
       'Residential segregation - Black/White raw value',
       'Residential segregation - non-white/white raw value',
       'Homicides raw value', 'Firearm fatalities raw value',
       'Homeownership raw value', 'Severe housing cost burden raw value',
       'Population raw value', '% below 18 years of age raw value',
       '% 65 and older raw value', '% Non-Hispanic African American raw value',
       '% American Indian and Alaskan Native raw value', '% Asian raw value',
       '% Native Hawaiian/Other Pacific Islander raw value',
       '% Hispanic raw value', '% Non-Hispanic white raw value',
       '% not proficient in English raw value', '% Females raw value',
       '% Rural raw value', 'Communicable disease raw value',
       'Self-inflicted injury 

## Removing Columns 

In [4]:
def del_col(df, suffixes):
    for suffix in suffixes:
        for col in df.columns:
            if suffix in col:
                del df[col]
    return df

In [5]:
county_data_new = del_col(county_data, ["numerator", "denominator", "CI low", "CI high", "County Ranked", "Release Year"])

## Renaming Columns

In [6]:
column_names = {}
for col_name in county_data_new.columns:
    col_name_clean = re.sub(r"[()\"#/@;:<>{}`+=~|.!?,]", " ", col_name)
    col_name_clean = col_name_clean.replace("raw value","").replace(" ","_").lower()
    col_name_clean = col_name_clean.replace("-","_").replace("__","_").replace("___","_")
    col_name_clean = col_name_clean.rstrip("_")
    column_names[col_name] = col_name_clean
county_data_new = county_data_new.rename(columns=column_names)

In [7]:
county_data_new.columns[0:100]

Index(['state_fips_code', 'county_fips_code', '5_digit_fips_code',
       'state_abbreviation', 'name', 'premature_death',
       'premature_death_black', 'premature_death_hispanic',
       'premature_death_white', 'poor_or_fair_health',
       'poor_physical_health_days', 'poor_mental_health_days',
       'low_birthweight', 'low_birthweight_black', 'low_birthweight_hispanic',
       'low_birthweight_white', 'adult_smoking', 'adult_obesity',
       'food_environment_index', 'physical_inactivity',
       'access_to_exercise_opportunities', 'excessive_drinking',
       'alcohol_impaired_driving_deaths', 'sexually_transmitted_infections',
       'teen_births', 'teen_births_black', 'teen_births_hispanic',
       'teen_births_white', 'uninsured', 'primary_care_physicians',
       'ratio_of_population_to_primary_care_physicians', 'dentists',
       'ratio_of_population_to_dentists', 'mental_health_providers',
       'ratio_of_population_to_mental_health_providers',
       'preventable_hospit

In [8]:
county_data_new.columns[100:157]

Index(['median_household_income', 'median_household_income_black',
       'median_household_income_hispanic', 'median_household_income_white',
       'children_eligible_for_free_or_reduced_price_lunch',
       'residential_segregation__black_white',
       'residential_segregation__non_white_white', 'homicides',
       'firearm_fatalities', 'homeownership', 'severe_housing_cost_burden',
       'population', '%_below_18_years_of_age', '%_65_and_older',
       '%_non_hispanic_african_american',
       '%_american_indian_and_alaskan_native', '%_asian',
       '%_native_hawaiian_other_pacific_islander', '%_hispanic',
       '%_non_hispanic_white', '%_not_proficient_in_english', '%_females',
       '%_rural', 'communicable_disease',
       'self_inflicted_injury_hospitalizations', 'cancer_incidence',
       'coronary_heart_disease_hospitalizations',
       'cerebrovascular_disease_hospitalizations', 'smoking_during_pregnancy',
       'drug_arrests', 'opioid_hospital_visits',
       'alcohol

In [9]:
county_data_new.to_csv("Data/county_data_clean.csv")

In [10]:
county_data_new

Unnamed: 0,state_fips_code,county_fips_code,5_digit_fips_code,state_abbreviation,name,premature_death,premature_death_black,premature_death_hispanic,premature_death_white,poor_or_fair_health,...,male_population_18_44,male_population_45_64,male_population_65,total_male_population,female_population_0_17,female_population_18_44,female_population_45_64,female_population_65,total_female_population,population_growth
0,0,0,0,US,United States,6900.630354,,,,,...,,,,,,,,,,
1,1,0,1000,AL,Alabama,9917.232898,,,,0.214024,...,,,,,,,,,,
2,1,1,1001,AL,Autauga County,8824.057123,10471.252986,,8706.658832,0.184111,...,,,,,,,,,,
3,1,3,1003,AL,Baldwin County,7224.632160,10042.472874,3086.605695,7277.780727,0.180605,...,,,,,,,,,,
4,1,5,1005,AL,Barbour County,9586.165037,11332.562909,,7309.636719,0.257734,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3189,56,37,56037,WY,Sweetwater County,7497.439952,,6413.215910,7834.050381,0.153772,...,,,,,,,,,,
3190,56,39,56039,WY,Teton County,3786.128226,,,,0.121817,...,,,,,,,,,,
3191,56,41,56041,WY,Uinta County,7790.302043,,,,0.158858,...,,,,,,,,,,
3192,56,43,56043,WY,Washakie County,5504.650970,,,,0.161261,...,,,,,,,,,,
