In [48]:
import numpy as np
import sys
import nsfg
import thinkstats2
from collections import defaultdict

def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                nrows=None):
    """Reads the NSFG respondent data.
    dct_file: string file name
    dat_file: string file name
    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    """CleanFemResp(df)"""
    return df

def ReadFemPreg(dct_file='2002FemPreg.dct',
                dat_file='2002FemPreg.dat.gz'):
    """Reads the NSFG pregnancy data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    """CleanFemPreg(df)"""
    return df

def MakePregMap(df):
    """Make a map from caseid to list of preg indices.

    df: DataFrame

    returns: dict that maps from caseid to list of indices into `preg`
    """
    d = defaultdict(list)
    for index, caseid in df.caseid.iteritems():
        d[caseid]
    return d    
    
def ValidatePregnum(resp, preg):
    """Validate pregnum in the respondent file.

    resp: respondent DataFrame
    preg: pregnancy DataFrame
    """
    # make the map from caseid to list of pregnancy indices
    preg_map = MakePregMap(preg)
    
    # iterate through the respondent pregnum series
    for index, pregnum in resp.pregnum.iteritems():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from the respondent file equals
        # the number of records in the pregnancy file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False
    return True

def main():
    resp = ReadFemResp()
    preg = ReadFemPreg()
    print(resp)
    # Print out Value Counts for Resp.pregnum
    print(resp.pregnum.value_counts().sort_index())
    # Cross-Validate Resp and Preg and compare pregnum with preg file
    assert(ValidatePregnum(resp, preg))
    
        
main()

      caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \
0       2298        1         5         5            1          5.0     27   
1       5012        1         5         1            5          5.0     42   
2      11586        1         5         1            5          5.0     43   
3       6794        5         5         4            1          5.0     15   
4        616        1         5         4            1          5.0     20   
5        845        1         5         4            1          5.0     42   
6      10333        5         5         3            1          5.0     17   
7        855        5         5         4            5          5.0     22   
8       8656        5         5         4            1          5.0     38   
9       3566        5         5         4            5          5.0     21   
10      5917        1         5         3            1          5.0     44   
11      9200        5         5         3            1          

AssertionError: 