In [1]:
import pandas as pd
from pathlib import Path

pd.options.mode.copy_on_write = True
pd.options.future.infer_string = True
pd.options.plotting.backend = "plotly"

In [8]:
def clean_chs_data(raw): 
    """Clean 'raw' DataFrame, create a new DataFrame with the same index as 'raw' and fill in with cleaned columns of 'raw'.
    Function involves replacing negative values of columns from 'bpiA' to 'bpiE' with pd.NA, changing datatypes of 'momid','age','childid','year', and setting 'childid' and 'year' as index.
    
    Args:
        raw (pd.DataFrame): Raw data to be cleaned.

    Returns:
        df (pd.DataFrame): The cleaned data.
    """
    df = pd.DataFrame(index = raw.index)
    df["momid"] = (raw["momid"]).astype(pd.UInt32Dtype())
    df["age"] = (raw["age"]).astype(pd.UInt32Dtype())
    for i in ["bpiA", "bpiB", "bpiC", "bpiD", "bpiE"]:
        df[i] = _clean_bpi(raw[i])
    df = df.set_index([raw["childid"].astype(pd.UInt32Dtype()), raw["year"].astype(pd.UInt32Dtype())])
    return df

def _clean_bpi(sr):
    """Replace BPI values '-100' with pd.NA."""
    sr = sr.astype(pd.Float32Dtype())
    sr = sr.replace({-100.000000: pd.NA})
    return sr

In [10]:
this_file_dir = Path('.').resolve()
bld = this_file_dir / "bld"    
raw = pd.read_stata(bld / "chs_data.dta")

In [11]:
df = clean_chs_data(raw)

In [12]:
df.dtypes

momid     UInt32
age       UInt32
bpiA     Float32
bpiB     Float32
bpiC     Float32
bpiD     Float32
bpiE     Float32
dtype: object

In [20]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,momid,age,bpiA,bpiB,bpiC,bpiD,bpiE
childid,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
201,1993,2,0,,,,,
201,1994,2,1,,,,,
201,1996,2,3,,,,,
201,1998,2,5,0.954977,0.252469,0.761378,-0.103273,0.547242
201,2000,2,7,,-0.305677,1.376603,-0.060863,-0.735066
...,...,...,...,...,...,...,...,...
1255601,1983,12556,5,,,,,
1255601,1985,12556,7,-0.42874,-1.727758,-1.045441,-0.696726,-0.735066
1255601,1987,12556,9,0.249955,-2.236943,-1.621489,0.52778,0.59118
1255601,1989,12556,11,,,,,
