In [1]:
import pandas as pd

### Urban County Per State

Identify one county per state as the main metro area using:
1. Population Density
2. Housing Density

In [2]:
# import the data
county_df = pd.read_excel('./Input_Data/2020_UA_COUNTY.xlsx', dtype={"COUNTY": str})

In [3]:
# clean the dataset
columns = ['STATE', 'COUNTY', 'STATE_NAME', 'COUNTY_NAME', 'POPDEN_COU', 'HOUDEN_COU']
county_clean_df = county_df[columns].copy()
county_clean_df['COUNTY_FIPS_CODE'] = county_clean_df['STATE'].astype(str) + county_clean_df['COUNTY']
county_clean_df['COUNTY_FIPS_CODE'] = county_clean_df['COUNTY_FIPS_CODE'].astype(int)
county_clean_df['COUNTY_NAME'] = county_clean_df['COUNTY_NAME'].str.replace(' *', '', regex=False).str.strip()


values_to_remove = ['Alaska', 'Hawaii', 'American Samoa', 'Guam', 'Commonwealth of the Northern Marianas', 'Puerto Rico', 'US Virgin Islands']
county_clean_df = county_clean_df[~county_clean_df['STATE_NAME'].isin(values_to_remove)]

In [4]:
# find top urban center per state
county_clean_df['combined_density'] = county_clean_df['POPDEN_COU'] + county_clean_df['HOUDEN_COU']
county_clean_df['TOP_METRO'] = county_clean_df.groupby('STATE')['combined_density'].transform(lambda x: x == x.max())
county_clean_df.drop(columns='combined_density', inplace=True)

### County Adjacency

Identify level 1-5 of adjacent counties to the top metro counties defined above


In [5]:
# graph class
class Graph:
    def __init__(self):
        self.adjacency_list = {}

    def add_node(self, node):
        if node not in self.adjacency_list:
            self.adjacency_list[node] = []

    def add_edge(self, node1, node2):
        if node1 == node2:
            return

        if node1 not in self.adjacency_list:
            self.add_node(node1)
        if node2 not in self.adjacency_list:
            self.add_node(node2)
        
        if node2 not in self.adjacency_list[node1]:
            self.adjacency_list[node1].append(node2)
        if node1 not in self.adjacency_list[node2]:
            self.adjacency_list[node2].append(node1)

    def remove_node(self, node):
        if node in self.adjacency_list:
            for neighbor in self.adjacency_list[node]:
                self.adjacency_list[neighbor].remove(node)
            del self.adjacency_list[node]

    def remove_edge(self, node1, node2):
        if node1 in self.adjacency_list and node2 in self.adjacency_list:
            self.adjacency_list[node1].remove(node2)
            self.adjacency_list[node2].remove(node1)

    def get_neighbors(self, node):
        return self.adjacency_list.get(node, [])
    
    def get_nodes(self):
        return list(self.adjacency_list.keys())

    def display(self):
        for node, neighbors in self.adjacency_list.items():
            print(f"{node}: {neighbors}")


In [6]:
# building the graph
county_adjacency_df = pd.read_csv('./Input_Data/county_adjacency2024.txt', sep="|")

graph = Graph()
edges = zip(county_adjacency_df['County GEOID'], county_adjacency_df['Neighbor GEOID'])
for county_fips, neighbor_fips in edges:
    graph.add_edge(county_fips, neighbor_fips)


# Step 1: Get the list of FIPS codes where TOP_METRO is True
top_metro_fips = county_clean_df[county_clean_df['TOP_METRO']]['COUNTY_FIPS_CODE'].tolist()

# Step 2: Initialize a list to store results
results = []

# Step 3: Loop over the top metro FIPS codes to find adjacent counties up to level 5
for fips in top_metro_fips:
    current_level = [fips]
    visited = set(current_level)  # To track visited counties and prevent duplicates
    results.append({
                        'county_fips': fips,
                        'neighbour_level': 0,
                        'neighbour_county_fips': fips
                    })
    for level in range(1, 6):  # Levels 1 to 5
        next_level = []
        for county in current_level:
            # Get adjacent counties from the graph
            neighbors = graph.get_neighbors(county)
            
            for neighbor in neighbors:
                if neighbor not in visited:
                    visited.add(neighbor)  # Mark as visited
                    next_level.append(neighbor)  # Add to the next level
                    # Append the result to the results list
                    results.append({
                        'county_fips': fips,
                        'neighbour_level': level,
                        'neighbour_county_fips': neighbor
                    })
        
        current_level = next_level  # Move to the next level

In [7]:
# Step 4: Create a DataFrame from the results
adjacency_df = pd.DataFrame(results)
county_info = county_clean_df[['COUNTY_FIPS_CODE', 'STATE_NAME', 'COUNTY_NAME', 'TOP_METRO']]

adjacency_df = adjacency_df.merge(county_info, 
                                   left_on='county_fips', 
                                   right_on='COUNTY_FIPS_CODE', 
                                   how='left')
adjacency_df.rename(columns={
    'STATE_NAME': 'state_name',
    'COUNTY_NAME': 'county_name',
    'TOP_METRO': 'top_metro'
}, inplace=True)

adjacency_df.drop(columns=['COUNTY_FIPS_CODE'], inplace=True)

adjacency_df = adjacency_df.merge(county_info, 
                                   left_on='neighbour_county_fips', 
                                   right_on='COUNTY_FIPS_CODE', 
                                   how='left')

adjacency_df.rename(columns={
    'STATE_NAME': 'neighbour_state_name',
    'COUNTY_NAME': 'neighbour_county_name',
    'TOP_METRO': 'neighbour_top_metro'
}, inplace=True)

adjacency_df.drop(columns=['COUNTY_FIPS_CODE'], inplace=True)
adjacency_df = adjacency_df[['county_fips', 'county_name', 'state_name', 'top_metro', 'neighbour_level', 'neighbour_county_fips', 'neighbour_county_name', 'neighbour_state_name', 'neighbour_top_metro']]

In [8]:
adjacency_df.to_excel('./Tableau/Datasources/adjacency_df.xlsx')
county_clean_df.to_excel('./Input_Data/county_clean_df.xlsx')