## OneStar County and TARC mapping

In [1]:
import pandas as pd
import re

In [2]:
## Import Data
fileName_input = "TARC_County_Mapping_(TexasDemographicCenter)_2013_09.csv"
tarc = pd.read_csv(fileName_input)

## Turn Counties string to columns
temp = tarc["Counties"].str.split(pat=", ", expand=True)

## Concat the county columns and drop old Counties column
tarc = pd.concat([tarc, temp], axis = "columns")
tarc = tarc.drop(labels = "Counties", axis="columns")
# print(tarc.shape)
# tarc.head()

## Melt wide format to long format
tarc = pd.melt(tarc, 
        id_vars = ["COG ID", "COG Name", "COG Long Name"],
        var_name = "County Num of County in COG", 
        value_name = "County", )

## Remove NA
mask = ~tarc["County"].isna()
tarc = tarc[mask].reset_index(drop=True)
tarc = tarc.drop("County Num of County in COG", axis="columns")

## Clean up the strings in County column
tarc["FIPS"] = tarc.County.apply(lambda row:  (re.search("\d+", row).group(0)))
tarc["County Name"] = tarc.County.replace("\ \(\d+\)", "", regex=True)
tarc["County Long Name"] = tarc.County.replace("\ \(\d+\)", " County", regex=True)



## Preview dataframe
print(tarc.shape)  # Texas has 254 counties
display(tarc)
display(tarc.tail())


(254, 7)


Unnamed: 0,COG ID,COG Name,COG Long Name,County,FIPS,County Name,County Long Name
0,1,Panhandle,Panhandle Regional Planning Commission,Armstrong (48011),48011,Armstrong,Armstrong County
1,2,South Plains,South Plains Association of Governments,Bailey (48017),48017,Bailey,Bailey County
2,3,Nortex,Nortex Regional Planning Commission,Archer (48009),48009,Archer,Archer County
3,4,North Central Texas,North Central Texas Council of Governments,Collin (48085),48085,Collin,Collin County
4,5,Ark-Tex,Ark-Tex Council of Governments,Bowie (48037),48037,Bowie,Bowie County
...,...,...,...,...,...,...,...
249,1,Panhandle,Panhandle Regional Planning Commission,Randall (48381),48381,Randall,Randall County
250,1,Panhandle,Panhandle Regional Planning Commission,Roberts (48393),48393,Roberts,Roberts County
251,1,Panhandle,Panhandle Regional Planning Commission,Sherman (48421),48421,Sherman,Sherman County
252,1,Panhandle,Panhandle Regional Planning Commission,Swisher (48437),48437,Swisher,Swisher County


Unnamed: 0,COG ID,COG Name,COG Long Name,County,FIPS,County Name,County Long Name
249,1,Panhandle,Panhandle Regional Planning Commission,Randall (48381),48381,Randall,Randall County
250,1,Panhandle,Panhandle Regional Planning Commission,Roberts (48393),48393,Roberts,Roberts County
251,1,Panhandle,Panhandle Regional Planning Commission,Sherman (48421),48421,Sherman,Sherman County
252,1,Panhandle,Panhandle Regional Planning Commission,Swisher (48437),48437,Swisher,Swisher County
253,1,Panhandle,Panhandle Regional Planning Commission,Wheeler (48483),48483,Wheeler,Wheeler County


In [3]:
## Export to csv
from datetime import datetime as dt

fileName_suffix = dt.now().strftime("%Y%m%d_%H%M%S")
fileName = re.sub("\.csv", str("_(LONG_FORMAT)_" + fileName_suffix + ".csv") , fileName_input)

tarc.to_csv(fileName, index=False)