<a href="https://colab.research.google.com/github/alchemistcohen/DATA/blob/main/ExtractDataFromCensus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import pandas as pd

#Extract Data from Census USA web page using API

# API key (Request in Census web page  https://api.census.gov/data/key_signup.html )
api_key = '1e14012e70bb47a0d785cf3a7019c7994072e3eb'

# Base URL for the Census API (Look for it https://www.census.gov/data/developers/updates/new-discovery-tool.html)
base_url = 'https://api.census.gov/data/2022/acs/acs1/pums'

# Define the variables you want to fetch (Look if the variables area available for the url used)
variables = ["POBP", "SEX", "ST"]

params = {
    'get': ','.join(variables),
    'key': api_key
}

try:
    response = requests.get(base_url, params=params)
    response.raise_for_status()  # Raises an HTTPError for bad responses

    # Print the response content to understand what is being returned
    print("Response Status Code:", response.status_code)
    print("Response Content-Type:", response.headers['Content-Type'])
    print("Response Text:", response.text[:500])  # Print the first 500 characters

    # Check if the response content is empty
    if not response.text.strip():
        raise ValueError("Empty response from API")

    if 'json' not in response.headers['Content-Type']:
        raise ValueError("Expected JSON response but got a different format. Check your API key and parameters.")

    data = response.json()

    columns = data[0]
    rows = data[1:]
    df = pd.DataFrame(rows, columns=columns)

    df.to_csv('CensusData1.csv', index=False)

    print("Data saved to CensusData1.csv")

    print(df.head())

except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except requests.exceptions.RequestException as req_err:
    print(f"Request error occurred: {req_err}")
except ValueError as val_err:
    print(f"Value error: {val_err}")
except json.JSONDecodeError as json_err:
    print(f"JSON decode error: {json_err}")
    print(response.text)  # Print the response content for debugging



Response Status Code: 200
Response Content-Type: application/json;charset=utf-8
Response Text: [["POBP","SEX","ST"],
["323","2","20"],
["012","1","12"],
["026","1","26"],
["029","2","17"],
["036","2","36"],
["020","1","12"],
["037","1","37"],
["004","1","04"],
["035","1","35"],
["048","1","48"],
["029","1","20"],
["001","1","12"],
["039","1","39"],
["006","1","32"],
["012","1","45"],
["017","1","17"],
["017","1","19"],
["042","1","42"],
["012","1","12"],
["048","2","08"],
["051","1","51"],
["039","2","39"],
["048","1","48"],
["048","1","48"],
["006","1","40"],
["037","1","13"],
["020","2"
Data saved to CensusData1.csv
  POBP SEX  ST
0  323   2  20
1  012   1  12
2  026   1  26
3  029   2  17
4  036   2  36


In [4]:
#Changing Codes to correspondent words

# Mapping for state codes to state names
state_mapping = {
    "01": "Alabama", "49": "Utah", "21": "Kentucky", "26": "Michigan", "29": "Missouri",
    "32": "Nevada", "34": "New Jersey", "08": "Colorado", "51": "Virginia", "39": "Ohio",
    "02": "Alaska", "46": "South Dakota", "04": "Arizona", "06": "California", "55": "Wisconsin",
    "15": "Hawaii", "22": "Louisiana", "30": "Montana", "47": "Tennessee", "48": "Texas",
    "09": "Connecticut", "50": "Vermont", "53": "Washington", "17": "Illinois", "20": "Kansas",
    "72": "Puerto Rico", "35": "New Mexico", "36": "New York", "10": "Delaware",
    "11": "District of Columbia", "12": "Florida", "56": "Wyoming", "16": "Idaho",
    "25": "Massachusetts", "27": "Minnesota", "42": "Pennsylvania", "45": "South Carolina",
    "13": "Georgia", "23": "Maine", "24": "Maryland", "28": "Mississippi", "37": "North Carolina",
    "41": "Oregon", "05": "Arkansas", "19": "Iowa", "31": "Nebraska", "33": "New Hampshire",
    "44": "Rhode Island", "54": "West Virginia", "18": "Indiana", "38": "North Dakota", "40": "Oklahoma"
}

# Mapping for sex codes
sex_mapping = {
    "1": "Male",
    "2": "Female"
}

# Mapping for POBP codes to their corresponding words
pobp_mapping = {
    "300": "Bermuda",
    "421": "Ghana",
    "429": "Liberia",
    "205": "Myanmar",
    "449": "South Africa",
    "211": "Indonesia",
    "333": "Jamaica",
    "460": "Zambia",
    "341": "Trinidad and Tobago",
    "360": "Argentina",
    "120": "Italy",
    "005": "Arkansas",
    "368": "Guyana",
    "249": "Asia",
    "370": "Peru",
    "130": "Azores Islands",
    "010": "Delaware",
    "373": "Venezuela",
    "154": "Serbia",
    "034": "New Jersey",
    "036": "New York",
    "157": "Lithuania",
    "159": "Azerbaijan",
    "042": "Pennsylvania",
    "044": "Rhode Island",
    "054": "West Virginia",
    "501": "Australia",
    "508": "Fiji",
    "515": "New Zealand",
    "323": "Bahamas",
    "447": "Sierra Leone",
    "328": "Dominica",
    "451": "Sudan",
    "457": "Uganda",
    "463": "South Sudan",
    "468": "Other Africa, Not Specified",
    "469": "Eastern Africa, Not Specified",
    "108": "Finland",
    "109": "France",
    "233": "Philippines",
    "235": "Saudi Arabia",
    "119": "Ireland",
    "365": "Ecuador",
    "245": "United Arab Emirates",
    "127": "Norway",
    "015": "Hawaii",
    "136": "Sweden",
    "016": "Idaho",
    "137": "Switzerland",
    "021": "Kentucky",
    "142": "Northern Ireland",
    "399": "Americas, Not Specified",
    "158": "Armenia",
    "072": "Puerto Rico",
    "314": "Honduras",
    "200": "Afghanistan",
    "321": "Antigua and Barbuda",
    "442": "Rwanda",
    "202": "Bangladesh",
    "206": "Cambodia",
    "454": "Togo",
    "459": "Democratic Republic of Congo",
    "222": "Kuwait",
    "102": "Austria",
    "228": "Mongolia",
    "117": "Hungary",
    "238": "Sri Lanka",
    "239": "Syria",
    "242": "Thailand",
    "248": "Yemen",
    "369": "Paraguay",
    "009": "Connecticut",
    "372": "Uruguay",
    "018": "Indiana",
    "028": "Mississippi",
    "161": "Georgia",
    "045": "South Carolina",
    "168": "Montenegro",
    "048": "Texas",
    "169": "Other Europe, Not Specified",
    "066": "Guam",
    "416": "Ethiopia",
    "301": "Canada",
    "311": "Costa Rica",
    "312": "El Salvador",
    "554": "Other US Island Areas, Oceania, Not Specified, or at Sea",
    "316": "Panama",
    "448": "Somalia",
    "329": "Dominican Republic",
    "209": "Hong Kong",
    "453": "Tanzania",
    "218": "Kazakhstan",
    "340": "St. Vincent and the Grenadines",
    "462": "Africa",
    "224": "Lebanon",
    "110": "Germany",
    "361": "Bolivia",
    "243": "Turkey",
    "002": "Alaska",
    "126": "Netherlands",
    "011": "District of Columbia",
    "253": "South Central Asia, Not Specified",
    "374": "South America",
    "013": "Georgia",
    "140": "Scotland",
    "022": "Louisiana",
    "025": "Massachusetts",
    "026": "Michigan",
    "027": "Minnesota",
    "030": "Montana",
    "156": "Latvia",
    "163": "Russia",
    "053": "Washington",
    "511": "Marshall Islands",
    "423": "Guinea",
    "313": "Guatemala",
    "440": "Nigeria",
    "444": "Senegal",
    "210": "India",
    "332": "Haiti",
    "212": "Iran",
    "217": "Korea",
    "338": "St. Kitts-Nevis",
    "339": "St. Lucia",
    "219": "Kyrgyzstan",
    "223": "Laos",
    "467": "Western Africa, Not Specified",
    "105": "Czechoslovakia",
    "001": "Alabama",
    "364": "Colombia",
    "008": "Colorado",
    "129": "Portugal",
    "134": "Spain",
    "139": "England",
    "019": "Iowa",
    "150": "Bosnia and Herzegovina",
    "152": "Macedonia",
    "033": "New Hampshire",
    "037": "North Carolina",
    "039": "Ohio",
    "160": "Belarus",
    "162": "Moldova",
    "055": "Wisconsin",
    "078": "US Virgin Islands",
    "400": "Algeria",
    "412": "Congo",
    "417": "Eritrea",
    "303": "Mexico",
    "425": "Ivory Coast",
    "203": "Bhutan",
    "324": "Barbados",
    "207": "China",
    "330": "Grenada",
    "213": "Iraq",
    "215": "Japan",
    "216": "Jordan",
    "464": "Northern Africa, Not Specified",
    "106": "Denmark",
    "004": "Arizona",
    "247": "Vietnam",
    "012": "Florida",
    "254": "Other Asia, Not Specified",
    "017": "Illinois",
    "024": "Maryland",
    "148": "Czech Republic",
    "151": "Croatia",
    "038": "North Dakota",
    "165": "USSR",
    "166": "Europe",
    "046": "South Dakota",
    "167": "Kosovo",
    "049": "Utah",
    "050": "Vermont",
    "056": "Wyoming",
    "527": "Samoa",
    "408": "Cabo Verde",
    "414": "Egypt",
    "420": "Gambia",
    "427": "Kenya",
    "430": "Libya",
    "436": "Morocco",
    "327": "Cuba",
    "461": "Zimbabwe",
    "100": "Albania",
    "343": "West Indies",
    "344": "Caribbean, Not Specified",
    "103": "Belgium",
    "226": "Malaysia",
    "236": "Singapore",
    "118": "Iceland",
    "363": "Chile",
    "246": "Uzbekistan",
    "006": "California",
    "128": "Poland",
    "023": "Maine",
    "149": "Slovakia",
    "041": "Oregon",
    "164": "Ukraine",
    "047": "Tennessee",
    "060": "American Samoa",
    "512": "Micronesia",
    "523": "Tonga",
    "310": "Belize",
    "315": "Nicaragua",
    "214": "Israel",
    "104": "Bulgaria",
    "229": "Nepal",
    "231": "Pakistan",
    "116": "Greece",
    "240": "Taiwan",
    "362": "Brazil",
    "132": "Romania",
    "138": "United Kingdom, Not Specified",
    "020": "Kansas",
    "147": "Yugoslavia",
    "029": "Missouri",
    "031": "Nebraska",
    "032": "Nevada",
    "035": "New Mexico",
    "040": "Oklahoma",
    "051": "Virginia",
    "069": "Commonwealth of the Northern Mariana Islands",
    "407": "Cameroon"
}




# Load the data into a pandas DataFrame
df = pd.read_csv('CensusData1.csv')


# Save or display the DataFrame
df.to_csv('CensusDataMod.csv', index=False)  # Optionally save to a CSV
print(df.head())  # Display the first few rows

# Replace the state codes with state names
df['ST'] = df['ST'].apply(lambda x: state_mapping.get(str(x), "Other"))

# Replace the sex codes with descriptions
df['SEX'] = df['SEX'].apply(lambda x: sex_mapping.get(str(x), "Other"))

# Replace the POBP codes with corresponding words
df['POBP'] = df['POBP'].apply(lambda x: pobp_mapping.get(str(x), "Other"))

# Save the modified DataFrame back to a CSV or use it for further processing
df.to_csv('CensusDataMod.csv', index=False)

# Print the first rows to check the result
print(df.head())


   POBP  SEX  ST
0   323    2  20
1    12    1  12
2    26    1  26
3    29    2  17
4    36    2  36
      POBP     SEX        ST
0  Bahamas  Female    Kansas
1    Other    Male   Florida
2    Other    Male  Michigan
3    Other  Female  Illinois
4    Other  Female  New York
