In [3]:
import pandas as pd

In [4]:
# Function to load data and extract unique city-state pairs
def load_and_extract(file_name):
    try:
        df = pd.read_csv(file_name)
        return df[['city', 'state']].drop_duplicates()
    except FileNotFoundError:
        return pd.DataFrame(columns=['city', 'state'])
    except KeyError:
        print(f"KeyError: Check if '{file_name}' contains 'city' and 'state' columns.")
        return pd.DataFrame(columns=['city', 'state'])

# Load and extract unique city-state pairs from each dataset
companies_unique = load_and_extract('clean-companies.csv')
real_estate_unique = load_and_extract('clean-real-estate-listings.csv')
fortune_1000_unique = load_and_extract('clean-fortune-1000.csv')

# Combine all the unique city-state pairs from different datasets
combined_city_state = pd.concat([companies_unique, real_estate_unique, fortune_1000_unique], ignore_index=True)
# Remove any duplicates from the combined DataFrame
unique_city_state = combined_city_state.drop_duplicates()

In [5]:
# Sort by state then city for better organization
unique_city_state = unique_city_state.sort_values(by=['state', 'city'])

print(unique_city_state.head())

             city    state
10291   abbeville  alabama
9418   adamsville  alabama
19477     addison  alabama
20125       adger  alabama
19682       akron  alabama


In [6]:
unique_city_state.to_csv('importable-city.csv', index=False)