In [1]:
import pandas as pd
import numpy as np

### This script processes county data
#### this script needs files: zip_codes_states.csv, Land_Area.csv, PEP_2017_PEPANNRES_with_ann.csv

In [2]:
pd.options.display.float_format = '{:.2f}'.format

In [3]:
def rchop(string, ending):
    """
    This function removes a specific substring from a given string
    string: a given string
    ending: the target substring to remove
    
    output: 
    string: updated string
    """
    if string.endswith(ending):
        return string[:-len(ending)]
    return string

#### process county location data

In [4]:
df = pd.read_csv('zip_codes_states.csv')
CA = df[df['state']=='CA']
loc = CA.drop('zip_code', axis=1).groupby('county').mean().add_prefix('mean_').reset_index()

#### process county area data

In [5]:
area_raw = pd.read_csv('Land_Area.csv')
area_1 = area_raw.drop(area_raw.index[[0,1]])
area_1['Areaname'] = area_1['Areaname'].map(lambda x: x[:-4])
area_2 = area_1.drop('STCOU', axis=1)
area_3 = area_2.rename(columns ={'Areaname': 'county'}, inplace=False)

#### process county population data

In [6]:
popu_raw = pd.read_csv('PEP_2017_PEPANNRES_with_ann.csv')
popu_2 = popu_raw.drop(popu_raw.index[0])
popu_3 = popu_2.drop(['GEO.id', 'GEO.id2', 'rescen42010', 'resbase42010'], axis=1)

popu8years = popu_3.loc[: , "respop72010":"respop72017"]
popu8years = popu8years.apply(pd.to_numeric)
popu_3['popu_mean'] = popu8years.mean(axis=1)

popu_4 = popu_3[['GEO.display-label', 'popu_mean']]
popu_5 = popu_4.rename(columns ={'GEO.display-label': 'county'}, inplace=False)
popu_5['county'] = popu_5['county'].map(lambda x: rchop(x, ' County, California'))

#### merge all kinds of county data to one file

In [7]:
loc_and_area = pd.merge(loc, area_3, on='county')
county = pd.merge(loc_and_area, popu_5, on='county')
county['popu_density'] = county['popu_mean']/county['Land Area (mi^2)']

In [8]:
county.to_csv('county_data.csv')

In [9]:
county

Unnamed: 0,county,mean_latitude,mean_longitude,Land Area (mi^2),popu_mean,popu_density
0,Alameda,37.73,-122.1,821.26,1593792.5,1940.67
1,Alpine,38.67,-120.06,743.23,1105.0,1.49
2,Amador,38.45,-120.71,604.26,37383.0,61.87
3,Butte,39.64,-121.6,1677.22,223370.38,133.18
4,Calaveras,38.21,-120.56,1036.92,45092.75,43.49
5,Colusa,39.19,-122.17,1156.32,21445.5,18.55
6,Contra Costa,37.9,-122.01,802.18,1102058.25,1373.83
7,Del Norte,41.82,-124.06,1229.84,27803.12,22.61
8,El Dorado,38.81,-120.5,1791.29,183484.88,102.43
9,Fresno,36.74,-119.7,6017.89,959688.5,159.47
