In [6]:
# Import pandas
import pandas as pd

In [7]:
# Load dataframe and get the shape of the data
df = pd.read_csv('nyc_detailed-race-and-ethnicity-data_2020_core-geographies.csv', 
                 header=3, low_memory=False)

print(f"Data loaded: {df.shape}")

Data loaded: (2717, 370)


In [8]:
# Filter to Manhattan Census tracts
manhattan_tracts = df[
    (df['GeoType'] == 'CT2020') &
    (df['GeoID'].astype(str).str.startswith('36061'))
]

print(f"Manhattan census tracts found: {len(manhattan_tracts)}")

Manhattan census tracts found: 310


In [9]:
# Define demographic columns
demographic_columns = {
    # Key information
    'GeoID': 'geo_id',
    'GeoName': 'geo_name',
    'Pop': 'total_population',
    
    # Hispanic/Latino groups
    'HMex': 'mexican_count',
    'HCA': 'central_american_total',
    'HCACstRcn': 'costa_rican_count',
    'HCAGutmln': 'guatemalan_count',
    'HCAHndrn': 'honduran_count',
    'HCANcrgn': 'nicaraguan_count',
    'HCAPnmn': 'panamanian_count',
    'HCASlvdrn': 'salvadoran_count',
    'HSA': 'south_american_total',
    'HSAArgntn': 'argentinean_count',
    'HSABlvn': 'bolivian_count',
    'HSAChln': 'chilean_count',
    'HSAClmbn': 'colombian_count',
    'HSAEcudrn': 'ecuadorian_count',
    'HSAPrguyn': 'paraguayan_count',
    'HSAPrvn': 'peruvian_count',
    'HSAUrgyn': 'uruguayan_count',
    'HSAVnzuln': 'venezuelan_count',
    'HCH': 'caribbean_hispanic_total',
    'HCHCuban': 'cuban_count',
    'HCHDmncn': 'dominican_count',
    'HCHPrtRcn': 'puerto_rican_count',
    
    # European groups
    'WEur': 'white_european_total',
    'WEurIrsh': 'irish_count',
    'WEurItln': 'italian_count',
    'WEurGrmn': 'german_count',
    'WEurPolish': 'polish_count',
    'WEurRsn': 'russian_count',
    'WEurEnglsh': 'english_count',
    'WEurFrnch': 'french_count',
    
    # Asian groups  
    'AEA': 'east_asian_total',
    'AEAChnsNoT': 'chinese_count',
    'AEAJpns': 'japanese_count',
    'AEAKrn': 'korean_count',
    'AEATwns': 'taiwanese_count',
    'ASA': 'south_asian_total',
    'ASAAsnInd': 'asian_indian_count',
    'ASABngldsh': 'bangladeshi_count',
    'ASANpls': 'nepalese_count',
    'ASAPkstn': 'pakistani_count',
    'ASASrLnkn': 'sri_lankan_count',
    'ASEA': 'southeast_asian_total',
    'ASEAFlpn': 'filipino_count',
    'ASEAVtnms': 'vietnamese_count',
    'ASEAThai': 'thai_count',
    'ASEACmbdn': 'cambodian_count',
    
    # Black/African groups
    'BAfrAm': 'black_african_american_total',
    'BSSAf': 'sub_saharan_african_total',
    'BSSAfNgrn': 'nigerian_count',
    'BSSAfGhn': 'ghanaian_count',
    'BSSAfEthpn': 'ethiopian_count',
    'BSSAfKnyn': 'kenyan_count',
    'BCrb': 'black_caribbean_total',
    'BCrbJmcn': 'jamaican_count',
    'BCrbHtn': 'haitian_count',
    'BCrbBrbdn': 'barbadian_count',
    'BCrbTrTob': 'trinidadian_tobagonian_count',
    
    # Middle Eastern/North African
    'WMENA': 'middle_eastern_north_african_total',
    'WMENAArab': 'arab_count',
    'WMENAEgptn': 'egyptian_count',
    'WMENAIrn': 'iranian_count',
    'WMENALbns': 'lebanese_count',
    
    # American Indian/Alaska Native
    'AIANAlkNtv': 'alaska_native_count',
    'AIANAIn': 'american_indian_total',
    
    # Pacific Islander
    'NHPIPly': 'pacific_islander_total'
}

In [10]:
# Filter available columns
columns = [col for col in demographic_columns.keys() if col in manhattan_tracts.columns]
print(f"Demographic columns available: {len(columns)}")

Demographic columns available: 67


In [None]:
# Select data columns and display the shape
data = manhattan_tracts[columns].copy()
print(f"Data selected: {data.shape}")

Data selected: (310, 67)


In [12]:
# Create a renaming dictionary to create human readable names
data = data.rename(columns={k: demographic_columns[k] for k in columns})

In [13]:
# Identify Numeric columns
numeric_columns = [col for col in data.columns if col not in ['geo_id', 'geo_name']]

In [14]:
# Remove commas and convert to numeric
for col in numeric_columns:
    data[col] = data[col].astype(str).str.replace(',', '').str.replace(' ', '')
    data[col] = pd.to_numeric(data[col], errors='coerce')

In [15]:
# Identify demographic columns for NaN filling
demographic_columns_final = [col for col in data.columns if col not in ['geo_id', 'geo_name']]

In [17]:
# Fill NaN values with zero
for col in demographic_columns_final:
    data[col] = data[col].fillna(0)
    

In [18]:
# Clean final data
data = data.dropna(subset=['geo_id'])
print(f"Final dataset: {data.shape}")
print(f"Total columns: {len(data.columns)}")

Final dataset: (310, 67)
Total columns: 67


In [19]:
# Save data to csv
data.to_csv('manhattan_race_ethnicity.csv', index=False)

In [None]:
# Display census tracts and manhattan population
print(f"Manhattan census tracts: {len(data)}")
print(f"Total Manhattan population: {data['total_population'].sum():,}")

Manhattan census tracts: 310
Total Manhattan population: 1,694,251


In [None]:
# Prepare demographics summary
demographics = [col for col in data.columns if col not in ['geo_id', 'geo_name', 'total_population']]
print(f"All demographic groups ({len(demographics)} total):")

All demographic groups (64 total):


In [23]:
# Display all demographics
for group in demographics:
    total = data[group].sum()
    tracts_with_pop = (data[group] > 0).sum()
    print(f"  {group}: {total:,} people across {tracts_with_pop} tracts")

  mexican_count: 41,471.0 people across 273 tracts
  central_american_total: 7,613.0 people across 53 tracts
  costa_rican_count: 70.0 people across 3 tracts
  guatemalan_count: 855.0 people across 25 tracts
  honduran_count: 2,692.0 people across 69 tracts
  nicaraguan_count: 425.0 people across 14 tracts
  panamanian_count: 470.0 people across 17 tracts
  salvadoran_count: 2,296.0 people across 57 tracts
  south_american_total: 37,373.0 people across 184 tracts
  argentinean_count: 3,253.0 people across 94 tracts
  bolivian_count: 0.0 people across 0 tracts
  chilean_count: 463.0 people across 16 tracts
  colombian_count: 9,763.0 people across 209 tracts
  ecuadorian_count: 12,793.0 people across 150 tracts
  paraguayan_count: 0.0 people across 0 tracts
  peruvian_count: 2,878.0 people across 90 tracts
  uruguayan_count: 0.0 people across 0 tracts
  venezuelan_count: 2,124.0 people across 67 tracts
  caribbean_hispanic_total: 252,999.0 people across 249 tracts
  cuban_count: 10,433.0