# Extract IPUMS Mozambique Datasets

In [31]:
# Import Packages
import os
from pathlib import Path
from ipumspy import IpumsApiClient, MicrodataExtract, readers, ddi

In [None]:
# Set up variables
API_KEY = "#####"
DOWNLOAD_DIR = Path(r"#####")

ipums = IpumsApiClient(API_Key)

In [None]:
# Construct a list of all variables for the extract, this will likely get trimmed over time
variables = [
    # Tech Households
    "PERSONS",
    # Group Quarters
    "GQ", "GQTYPE", "UNREL",
    # Global Geography
    "URBAN", "POPDENSGEO2",
    # National Geography
    "GEO1_MZ", "GEO2_MZ",
    # Household Economic
    "OWNERSHIP",
    # Utilities
    "ELECTRIC", "WATSUP", "PHONE", "CELL", "INTERNET", "TRASH",
    # Appliances
    "AUTOS", "MOTORCYCLE", "BIKE", "COMPUTER", "REFRIG", "STOVE", "TV", "RADIO",
    # Dwelling Characteristics
    "ROOMS", "BEDROOMS", "TOILET", "FLOOR", "WALL", "ROOF",
    # Other Household
    "MORTNUM", "ANYMORT",
    # Constructed Household
    "HHTYPE", "NFAMS", "NCOUPLES", "NMOTHERS", "NFATHERS",
    # Constructed Family
    "FAMSIZE", "NCHILD", "NCHLT5", "ELDCH", "YNGCH",
    # Demographic
    "RELATE", "AGE", "SEX", "MARST", "CONSENS", 
    # Fertility and Mortality
    "CHBORN", "CHSURV", "CHBORNF", "CHBORNM", "CHSURVF", "CHSURVM", "BIRTHSLYR", "BIRTHSURV", "MORTMOT", "MORTFAT",
    # Navity and Birthplace
    "NATIVITY", "CITIZEN", "NATION", "BTHCERT", "BPL1_MZ", "BPL2_MZ",
    # Ethnicity and Language
    "RELIGION", "RACE", "SPEAKPORT", "LANGMZ", "MTONGMZ", 
    # Education
    "SCHOOL", "LIT", "EDATTAIN",
    # Work
    "EMPSTAT", "LABFORCE", "EMPSECT",
    # Occupation, Industry
    "OCCISCO", "ISCO08A", "ISCO88A", "INDGEN",
    # Global Migration - Not giving us any other migration variables, as they would likely be too correlated and uninteresting (AH)
    "MIGRATE1", "MIGRATE5", 
    # Disability
    "DISABLED", "DISEMP", "DISBLND", "DISDEAF", "DISMUTE", "DISLOWR", "DISUPPR", "DISMOBIL", "DISMNTL", "DISORIG"
]

In [None]:
# Build Extract
extract = MicrodataExtract(
    collection = "ipumsi",
    description = "Data Mining MZ Project",
    samples = ["mz1997a", "mz2007a", "mz2017a"],
    variables = variables
)

In [42]:
# Submit the Extract
ipums.submit_extract(extract)
print(f"Extract ID: {extract.extract_id}")

# Wait for extract to complete processing on ipums servers, this could take a while and may be inconsistent
ipums.wait_for_extract(extract)

# Download the extract
ipums.download_extract(extract, download_dir=DOWNLOAD_DIR)

Extract ID: 13


In [43]:
# Read Data Dictionary
ddi_file = os.path.join(DOWNLOAD_DIR, f"ipumsi_{extract.extract_id:05d}.xml")
ddi = readers.read_ipums_ddi(ddi_file)

# Extract Data
ipums_df = readers.read_microdata(ddi, DOWNLOAD_DIR / ddi.file_description.filename)
print(ipums_df.shape)
ipums_df.head()

See the `ipums_conditions` attribute of this codebook for terms of use.
See the `ipums_citation` attribute of this codebook for the appropriate citation.


(6283068, 103)


Unnamed: 0,COUNTRY,YEAR,SAMPLE,SERIAL,PERSONS,HHWT,GQ,GQTYPE,UNREL,URBAN,POPDENSGEO2,GEO1_MZ,GEO2_MZ,OWNERSHIP,OWNERSHIPD,ELECTRIC,WATSUP,PHONE,CELL,INTERNET,TRASH,AUTOS,MOTORCYCLE,BIKE,COMPUTER,REFRIG,STOVE,TV,RADIO,ROOMS,BEDROOMS,TOILET,FLOOR,WALL,ROOF,MORTNUM,ANYMORT,HHTYPE,NFAMS,NCOUPLES,...,BIRTHSLYR,BIRTHSURV,MORTMOT,MORTFAT,NATIVITY,CITIZEN,NATION,BTHCERT,BPL1_MZ,BPL2_MZ,RELIGION,RELIGIOND,RACE,SPEAKPORT,LANGMZ,MTONGMZ,SCHOOL,LIT,EDATTAIN,EDATTAIND,EMPSTAT,EMPSTATD,LABFORCE,EMPSECT,OCCISCO,ISCO08A,ISCO88A,INDGEN,MIGRATE1,MIGRATE5,DISABLED,DISEMP,DISBLND,DISDEAF,DISMUTE,DISLOWR,DISUPPR,DISMOBIL,DISMNTL,DISORIG
0,508,1997,508199701,1000,5,10.0,10,999,0,2,26.32,508001,508001001,,,2,20,,,,,,,,,,,,2,2,2,22,,,45,,,3,1,1,...,9,9,1,,2,1,11100,,508097,508097097,,,20,1,,,4,1,1,110,1,110,2,22,6,,621,10,11,20,2,2,,,,,,,2,
1,508,1997,508199701,1000,5,10.0,10,999,0,2,26.32,508001,508001001,,,2,20,,,,,,,,,,,,2,2,2,22,,,45,,,3,1,1,...,0,9,2,,1,1,11100,,508001,508001001,,,20,2,,,4,1,1,110,1,110,2,22,6,,621,10,11,11,2,2,,,,,,,2,
2,508,1997,508199701,1000,5,10.0,10,999,0,2,26.32,508001,508001001,,,2,20,,,,,,,,,,,,2,2,2,22,,,45,,,3,1,1,...,9,9,1,,1,1,11100,,508001,508001001,,,20,2,,,4,1,1,110,0,0,9,0,99,,999,0,11,11,2,9,,,,,,,2,
3,508,1997,508199701,1000,5,10.0,10,999,0,2,26.32,508001,508001001,,,2,20,,,,,,,,,,,,2,2,2,22,,,45,,,3,1,1,...,9,9,1,,1,1,11100,,508001,508001001,,,20,2,,,4,1,1,110,1,110,2,21,5,,522,60,11,11,2,2,,,,,,,2,
4,508,1997,508199701,1000,5,10.0,10,999,0,2,26.32,508001,508001001,,,2,20,,,,,,,,,,,,2,2,2,22,,,45,,,3,1,1,...,9,9,1,,1,1,11100,,508001,508001001,,,20,2,,,4,1,1,110,1,110,2,21,5,,522,60,11,11,2,2,,,,,,,2,
