In [1]:
from typing import List, Dict
import requests
import pandas as pd

In [2]:
state_fips = {
            'ALABAMA': '01',
            'ALASKA': '02',
            'ARIZONA': '04',
            'ARKANSAS': '05',
            'CALIFORNIA': '06',
            'COLORADO': '08',
            'CONNECTICUT': '09',
            'DELAWARE': '10',
            'DISTRICT OF COLUMBIA': '11',
            'FLORIDA': '12',
            'GEORGIA': '13',
            'HAWAII': '15',
            'IDAHO': '16',
            'ILLINOIS': '17',
            'INDIANA': '18',
            'IOWA': '19',
            'KANSAS': '20',
            'KENTUCKY': '21',
            'LOUISIANA': '22',
            'MAINE': '23',
            'MARYLAND': '24',
            'MASSACHUSETTS': '25',
            'MICHIGAN': '26',
            'MINNESOTA': '27',
            'MISSISSIPPI': '28',
            'MISSOURI': '29',
            'MONTANA': '30',
            'NEBRASKA': '31',
            'NEVADA': '32',
            'NEW HAMPSHIRE': '33',
            'NEW JERSEY': '34',
            'NEW MEXICO': '35',
            'NEW YORK': '36',
            'NORTH CAROLINA': '37',
            'NORTH DAKOTA': '38',
            'OHIO': '39',
            'OKLAHOMA': '40',
            'OREGON': '41',
            'PENNSYLVANIA': '42',
            'RHODE ISLAND': '44',
            'SOUTH CAROLINA': '45',
            'SOUTH DAKOTA': '46',
            'TENNESSEE': '47',
            'TEXAS': '48',
            'UTAH': '49',
            'VERMONT': '50',
            'VIRGINIA': '51',
            'WASHINGTON': '53',
            'WEST VIRGINIA': '54',
            'WISCONSIN': '55',
            'WYOMING': '56',
            'PUERTO RICO': '72'
        }

In [3]:
class CensusDataRetriever:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.census.gov/data"
        self.year = "2022"  # Most recent ACS 5-year estimates
        self.dataset = "acs/acs5"
        
    def get_variable_groups(self) -> Dict:
        """Retrieve available variable groups for reference"""
        url = f"{self.base_url}/{self.year}/{self.dataset}/groups.json"
        response = requests.get(url)
        return response.json()
    
    def get_place_data(self, state_fips: str, variables: List[str]) -> pd.DataFrame:
        """
        Retrieve data for all places (cities) in a state
        
        Parameters:
        state_fips: Two-digit state FIPS code
        variables: List of Census variable codes
        """
        # Construct variable string
        var_str = ",".join(variables)
        
        # Build API URL
        url = f"{self.base_url}/{self.year}/{self.dataset}"
        
        # Parameters for the request
        params = {
            "get": f"NAME,{var_str}",
            "for": "place:*",
            "in": f"state:{state_fips}",
            "key": self.api_key
        }
        
        response = requests.get(url, params=params)
        if response.status_code != 200:
            raise Exception(f"API request failed: {response.text}")
            
        # Convert response to DataFrame
        data = response.json()
        df = pd.DataFrame(data[1:], columns=data[0])
        return df


In [50]:
class CensusDataRetriever:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.census.gov/data"
        self.year = "2022"  # Most recent ACS 5-year estimates
        self.dataset = "acs/acs5"
        
    def get_variable_groups(self) -> Dict:
        """Retrieve available variable groups for reference"""
        url = f"{self.base_url}/{self.year}/{self.dataset}/groups.json"
        response = requests.get(url)
        return response.json()
    
    def get_metro_data(self, variables: List[str]) -> pd.DataFrame:
        """
        Retrieve data for all Metropolitan Statistical Areas (CBSAs)
        
        Parameters:
        variables: List of Census variable codes
        """
        # Construct variable string
        var_str = ",".join(variables)
        
        # Build API URL
        url = f"{self.base_url}/{self.year}/{self.dataset}"
        
        # Parameters for the request - using CBSA (Core Based Statistical Area) geography
        params = {
            "get": f"NAME,{var_str}",
            "for": "metropolitan statistical area/micropolitan statistical area:*",
            "key": self.api_key
        }
        
        response = requests.get(url, params=params)
        if response.status_code != 200:
            raise Exception(f"API request failed: {response.text}")
            
        # Convert response to DataFrame
        data = response.json()
        df = pd.DataFrame(data[1:], columns=data[0])
        
        # Filter to only metropolitan areas (population >= 50,000)
        # Convert population to numeric for filtering
        df['B01001_001E'] = pd.to_numeric(df['B01001_001E'])
        metro_areas = df[df['B01001_001E'] >= 50000].copy()
        
        return metro_areas
    
    def get_component_places(self, cbsa_code: str, variables: List[str]) -> pd.DataFrame:
        """
        Get all places (cities) within a specific CBSA
        
        Parameters:
        cbsa_code: The CBSA code for the metropolitan area
        variables: List of Census variable codes
        """
        var_str = ",".join(variables)
        url = f"{self.base_url}/{self.year}/{self.dataset}"
        
        params = {
            "get": f"NAME,{var_str}",
            "for": "place:*",
            "in": f"metropolitan statistical area/micropolitan statistical area:{cbsa_code}",
            "key": self.api_key
        }
        
        response = requests.get(url, params=params)
        if response.status_code != 200:
            raise Exception(f"API request failed: {response.text}")
            
        data = response.json()
        return pd.DataFrame(data[1:], columns=data[0])

In [51]:
API_KEY = "185f22854b5fff8e12d4d135d81adabd7ede4310"
    
  # Initialize retriever
retriever = CensusDataRetriever(API_KEY)

# Example variables for different demographics
# You can find more variables at: https://api.census.gov/data/2022/acs/acs5/variables.html


column_map = {
        'B01001_001E': 'total_population',
        'B01001H_025E': 'female_white_30_34',
        'B01001D_025E': 'female_asian_30_34',
        'B01001D_010E': 'male_asian_30_34',
        'B01001H_010E': 'male_white_30_34',
        'B01001H_011E': 'male_white_35_39',
        'B01001D_011E': 'male_asian_35_39',
        'B01001_036E': 'female_total_30_34',
        'B01001_012E': 'male_total_30_34',
        'B01001_013E': 'male_total_35_39',
        

        'B12002_101E': 'female_never_married_30_34',
        'B12002_102E': 'female_never_married_35_39',
        'B12002_008E': 'male_never_married_30_34',
        'B12002_009E': 'male_never_married_35_39',
}

variables = [k for k, v in column_map.items()]

In [32]:
ca_data = retriever.get_place_data("06", variables)
ca_data = ca_data.rename(columns=column_map)
num_cols = list(column_map.values())
ca_data[num_cols] = ca_data[num_cols].apply(pd.to_numeric)

In [54]:
metro_data = retriever.get_metro_data(variables)
metro_data = metro_data.rename(columns=column_map)
num_cols = list(column_map.values())
metro_data[num_cols] = metro_data[num_cols].apply(pd.to_numeric)

In [55]:
metro_data.head()

Unnamed: 0,NAME,total_population,female_white_30_34,female_asian_30_34,male_asian_30_34,male_white_30_34,male_white_35_39,male_asian_35_39,female_total_30_34,male_total_30_34,male_total_35_39,female_never_married_30_34,female_never_married_35_39,male_never_married_30_34,male_never_married_35_39,metropolitan statistical area/micropolitan statistical area
1,"Aberdeen, WA Micro Area",75672,1553,33,31,1793,3596,97,1982,2494,2396,704,618,1238,883,10140
2,"Abilene, TX Metro Area",176656,3395,132,129,3617,6642,226,5683,6575,6339,1406,935,2641,1816,10180
4,"Adrian, MI Micro Area",99263,2266,0,3,2429,5154,7,2666,2932,3401,794,668,1322,1027,10300
5,"Aguadilla-Isabela, PR Metro Area",308919,535,16,0,335,311,20,9099,8927,7757,5504,3238,6179,4327,10380
6,"Akron, OH Metro Area",700578,16223,1052,1047,17040,32135,1967,21930,22021,20956,8727,4961,10659,6951,10420


In [56]:
populous = (metro_data[(metro_data["female_asian_30_34"] > 500) &
            (metro_data["female_never_married_30_34"] > 500)]).copy()

In [1]:
def favorable_dating_index(df):
    fdi = ((df["female_asian_30_34"] + df["female_white_30_34"])* (df["female_never_married_30_34"] / df["female_total_30_34"])
           / (((df["male_never_married_30_34"] + df["male_never_married_35_39"]) / (df["male_total_30_34"] + df["male_total_35_39"]))
              * (df["male_white_30_34"] + df["male_white_35_39"] + df["male_asian_30_34"] + df["male_asian_35_39"])))
    return fdi

In [2]:
populous["fdi"] = favorable_dating_index(populous)
populous["f_to_m"] = populous["female_never_married_30_34"] / populous["male_never_married_30_34"]
populous["f_surplus"] = populous["female_never_married_30_34"] - populous["male_never_married_30_34"]

NameError: name 'populous' is not defined

In [60]:
len(populous)

124

In [59]:
populous.sort_values(by='fdi', ascending=False).head(20)

Unnamed: 0,NAME,total_population,female_white_30_34,female_asian_30_34,male_asian_30_34,male_white_30_34,male_white_35_39,male_asian_35_39,female_total_30_34,male_total_30_34,male_total_35_39,female_never_married_30_34,female_never_married_35_39,male_never_married_30_34,male_never_married_35_39,metropolitan statistical area/micropolitan statistical area,fdi,f_to_m,f_surplus
434,"Kapaa, HI Micro Area",73511,654,730,505,578,1419,1166,2225,2200,2518,1239,830,1241,1061,28180,0.227137,0.998388,-2
868,"Urban Honolulu, HI Metro Area",1010100,6860,12746,11449,8670,14526,24354,34667,38550,36251,13477,9305,19297,12642,46520,0.196695,0.698399,-5820
759,"San Jose-Sunnyvale-Santa Clara, CA Metro Area",1981584,17584,33853,38438,22041,39708,62150,74289,87149,79406,26066,14616,42495,23836,41940,0.183726,0.61339,-16429
757,"San Francisco-Oakland-Berkeley, CA Metro Area",4692242,62773,59750,56408,71307,129367,101334,187300,198762,190045,84132,51157,110274,69901,41860,0.16159,0.762936,-26142
377,"Hilo, HI Micro Area",202163,1501,1168,1032,1473,3529,2432,6191,5974,6551,3357,2025,3945,2568,25900,0.143864,0.850951,-588
825,"Stockton, CA Metro Area",779445,6675,5114,5065,7104,14079,9780,26377,27470,28216,10700,7030,14049,9424,44700,0.136602,0.76162,-3349
429,"Kahului-Wailuku-Lahaina, HI Metro Area",164765,1521,1227,1032,1420,3439,2836,5241,5004,6147,2200,1844,2849,2171,27980,0.131099,0.772201,-649
499,"Los Angeles-Long Beach-Anaheim, CA Metro Area",13111917,134243,84179,79849,147736,258624,145281,493667,521972,476114,245914,149857,316446,196928,31080,0.129098,0.777112,-70532
314,"Fresno, CA Metro Area",1008280,8996,5204,5260,9251,17670,7572,36553,38683,36190,14813,9501,18634,13349,23420,0.124192,0.794945,-3821
608,"New York-Newark-Jersey City, NY-NJ-PA Metro Area",19908595,284998,103291,95058,292612,527611,179360,720294,728359,681003,343704,208810,410308,245362,35620,0.096784,0.837673,-66604


In [63]:
populous.sort_values(by='f_to_m', ascending=False).head(50)

Unnamed: 0,NAME,total_population,female_white_30_34,female_asian_30_34,male_asian_30_34,male_white_30_34,male_white_35_39,male_asian_35_39,female_total_30_34,male_total_30_34,male_total_35_39,female_never_married_30_34,female_never_married_35_39,male_never_married_30_34,male_never_married_35_39,metropolitan statistical area/micropolitan statistical area,fdi,f_to_m,f_surplus
543,"Memphis, TN-MS-AR Metro Area",1335804,17773,1370,1120,17612,34597,2682,47873,44018,40927,23360,17054,22486,13964,32820,0.027814,1.038869,874
603,"New Orleans-Metairie, LA Metro Area",1264357,22271,1657,1242,21752,42598,2492,47762,43663,44721,25636,17366,25559,17791,35380,0.026634,1.003013,77
434,"Kapaa, HI Micro Area",73511,654,730,505,578,1419,1166,2225,2200,2518,1239,830,1241,1061,28180,0.227137,0.998388,-2
345,"Greensboro-High Point, NC Metro Area",775169,12823,1334,1223,12339,23916,2458,25860,23226,22466,11199,6428,11435,7255,24660,0.035365,0.979362,-236
491,"Little Rock-North Little Rock-Conway, AR Metro...",749290,16279,804,356,16126,31463,1147,26729,24751,25412,9810,6731,10118,7156,30780,0.017455,0.969559,-308
90,"Birmingham-Hoover, AL Metro Area",1114368,22123,913,702,21594,42794,1318,38952,36549,34373,15058,9063,15931,9210,13820,0.014993,0.945201,-873
47,"Atlanta-Sandy Springs-Alpharetta, GA Metro Area",6094752,88714,17250,15623,88610,174388,33665,221065,208397,209499,92900,62119,98612,65185,12060,0.059224,0.942076,-5712
406,"Jackson, MS Metro Area",591397,8283,524,272,8000,17152,792,20246,19390,19257,8578,6527,9222,6828,27140,0.020392,0.930167,-644
185,"Columbia, SC Metro Area",831913,14469,846,728,14295,27463,1385,28529,26670,25370,11915,7892,12835,7245,17900,0.020872,0.928321,-920
158,"Charleston-North Charleston, SC Metro Area",803398,20195,614,653,19748,36216,1155,31667,30525,29727,12408,7842,13371,9178,16700,0.011127,0.927978,-963
