In [1]:
#Import the pandas package for data wrangling
import pandas as pd

There are multiple files being imported below: you need to change the paths so they match the locations of the files on your computer. 

In [2]:
#First import the file with the data, and the RCDTS code 
#Here I am using a file that has Illinois 2012 data 
#CAVEAT: sometimes the RCDTS don't import well from CSV - you can check by typing df.head() and make sure the code has 15 digits
df = pd.read_csv('C:/Data_various/Illinois_hesitancy/IL_NME_2012.csv')

#Import a file that you can use to match RCDTS codes to county codes 
IL_codes = pd.read_csv('C:/Data_various/Illinois_hesitancy/key_codes.csv')

#County codes from the RCDTS have 3 digits
#But a csv file does not keep zeros at the beginning of digits 
#This 'pads' the 2 digit codes with a zero at the beginning so all county codes are 3 digits long
#It stores the result in a County_code column 
IL_codes['County_code'] = IL_codes['County_code'].astype(str).str.pad(3, 'left', '0')


#Create a 3 digit county code from the RCDTS column
#The location information is encoded in the RCDTS column - Region(XX)-County(XXX)-District(XXXX)-Type(XX)-School(XXXX)
#First transform the numeric code as a 'string' object - they are easier to manipulate
#Then it 'slices' the code starting on the 3rd character (start = 2 means start AFTER 2) and ending on the fifth character
#The result is stored in a new column of the dataframe, called 'County_code'
df['County_code'] = df['RCDTS'].astype(str).str.pad(15, 'left', '0').str.slice(start = 2, stop = 5)

#Merge the two datasets 
#Conveniently, they now share a 'County_code' column that can be used to match them 
#`how = 'left'` indicates that all entries from the left dataframe (here, df) should be kept and matched with an entry from the right dataframe (here, IL_codes)
#The result is a new dataframe, where county names have been given to all rows based on the county code originally in the RCDTS
IL_with_county_name = pd.merge(df, IL_codes, how = 'left', on = 'County_code')

#Now export the new dataframe 
#Pandas has a convenient method to do that: 
IL_with_county_name.to_csv('C:/Data_various/Illinois_hesitancy/IL_w_county.csv', index = False)