In [88]:
import pandas as pd
from pathlib import Path

In [89]:
# Use pathlib to set the relative file path
file_path = Path(r'..\Data\national_cousub2020.txt')

# Read the TSV file into a DataFrame
original_df = pd.read_csv(file_path, sep='|', dtype=str)  # Use '|' as a delimiter for TSV

# Display the first few rows of the DataFrame
original_df

Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNAME,COUSUBFP,COUSUBNS,COUSUBNAME,CLASSFP,FUNCSTAT
0,AL,01,001,Autauga County,90171,00161593,Autaugaville CCD,Z5,S
1,AL,01,001,Autauga County,90315,00165647,Billingsley CCD,Z5,S
2,AL,01,001,Autauga County,92106,00165648,Marbury CCD,Z5,S
3,AL,01,001,Autauga County,92628,00165649,Prattville CCD,Z5,S
4,AL,01,003,Baldwin County,90207,00161594,Bay Minette CCD,Z5,S
...,...,...,...,...,...,...,...,...,...
36635,VI,78,030,St. Thomas Island,61300,01939766,Northside subdistrict,Z5,N
36636,VI,78,030,St. Thomas Island,74800,01939770,Southside subdistrict,Z5,N
36637,VI,78,030,St. Thomas Island,78400,01939772,Tutu subdistrict,Z5,N
36638,VI,78,030,St. Thomas Island,82000,01939773,Water Island subdistrict,Z5,N


In [90]:
new_df = original_df[['STATEFP', 'COUNTYFP', 'COUNTYNAME', 'STATE']]

new_df.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNAME,STATE
0,1,1,Autauga County,AL
1,1,1,Autauga County,AL
2,1,1,Autauga County,AL
3,1,1,Autauga County,AL
4,1,3,Baldwin County,AL


In [100]:
# Create a new column with concatenated values
new_df['FIPS'] = new_df['STATEFP'] + new_df['COUNTYFP']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['FIPS'] = new_df['STATEFP'] + new_df['COUNTYFP']


In [102]:
# Create an explicit copy first
new_df = original_df[['STATEFP', 'COUNTYFP', 'COUNTYNAME', 'STATE']].copy()

# Now concatenate the columns
new_df['FIPS'] = new_df['STATEFP'].astype(str) + new_df['COUNTYFP'].astype(str)

n

Unnamed: 0,STATEFP,COUNTYFP,COUNTYNAME,STATE,FIPS
0,01,001,Autauga County,AL,01001
1,01,001,Autauga County,AL,01001
2,01,001,Autauga County,AL,01001
3,01,001,Autauga County,AL,01001
4,01,003,Baldwin County,AL,01003
...,...,...,...,...,...
36635,78,030,St. Thomas Island,VI,78030
36636,78,030,St. Thomas Island,VI,78030
36637,78,030,St. Thomas Island,VI,78030
36638,78,030,St. Thomas Island,VI,78030


In [105]:
# Drop duplicates and reset index
fips_df = new_df[['FIPS', 'COUNTYNAME', 'STATE']].drop_duplicates(keep='first').reset_index(drop=True)

In [108]:
fips_df

Unnamed: 0,FIPS,COUNTYNAME,STATE
0,01001,Autauga County,AL
1,01003,Baldwin County,AL
2,01005,Barbour County,AL
3,01007,Bibb County,AL
4,01009,Blount County,AL
...,...,...,...
3230,72153,Yauco Municipio,PR
3231,74300,Midway Islands,UM
3232,78010,St. Croix Island,VI
3233,78020,St. John Island,VI


In [109]:
fips_df.to_csv('fips_data.csv', index=False)

In [110]:
fips_df['FIPS'].unique()

array(['01001', '01003', '01005', ..., '78010', '78020', '78030'],
      dtype=object)

In [92]:
#Extract the the state date
state_df = original_df[['STATEFP','STATE']]

#Display the dataframe
state_df

Unnamed: 0,STATEFP,STATE
0,01,AL
1,01,AL
2,01,AL
3,01,AL
4,01,AL
...,...,...
36635,78,VI
36636,78,VI
36637,78,VI
36638,78,VI


In [93]:
state_df['STATE'].unique()

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'AS', 'GU', 'MP', 'PR',
       'UM', 'VI'], dtype=object)

In [94]:
original_df.columns

Index(['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'COUSUBFP', 'COUSUBNS',
       'COUSUBNAME', 'CLASSFP', 'FUNCSTAT'],
      dtype='object')

In [95]:
county_df = original_df[['COUNTYFP', 'COUNTYNAME', 'STATE']]

county_df

Unnamed: 0,COUNTYFP,COUNTYNAME,STATE
0,001,Autauga County,AL
1,001,Autauga County,AL
2,001,Autauga County,AL
3,001,Autauga County,AL
4,003,Baldwin County,AL
...,...,...,...
36635,030,St. Thomas Island,VI
36636,030,St. Thomas Island,VI
36637,030,St. Thomas Island,VI
36638,030,St. Thomas Island,VI


In [96]:
# cleaned_county_df = county_df.drop_duplicates(keep='first')

# cleaned_county_df.to_csv('cleaned_county.csv')

In [97]:
# cleaned_state_df = state_df.drop_duplicates(keep='first')

# cleaned_state_df.to_csv('cleaned_state.csv')