In [1]:
crime_headers = ["STATE_NUM","AGENCY","G","MO","HOM","INC_NUM","SIT", "VICTIMS", "OFFENDERS", "AGENCY NAME","STATE"
] 
offender_headers = ["OA","OS","OR","OE","OWEAP","OREL","OCIR","OSUB"]
victim_headers = ["VA","VS","VR","VE"]
crime_offsets = [4, 7, 15, 17, 21, 26, 32, [37, 41, 44, 47], [51, 55, 58, 61, 65, 71, 76, 81], 85, 111]

def create_tuples(l):
    '''
    Helper function for getting offsets.
    Takes a list of crime offset positions, and returns list of tuples of start and end offsets for each field
    '''
    crime_tuples = []
    for i, j in enumerate(l):
        #exclude final element
        if i != len(l)-1:
            j1 = l[i+1]
            #flat fields
            if type(j) == int:
                if type(j1) == int:
                    t = (j, j1)
                else:
                    t = (j, j1[0])
                crime_tuples.append(t)
            #victim and offender
            else:
                sub_list = []
                for index, val in enumerate(j):
                    if index != len(j)-1:
                        val1 = j[index + 1]
                        t = (val, val1)
                    else:
                        if type(j1) == int:
                            t = (val, j1)
                        else:
                            t = (val, j1[0])
                    sub_list.append(t)
                crime_tuples.append(sub_list)
    return crime_tuples

def create_dict(positions, headers):
    '''
    Helper function for getting offsets.
    Take list of crime tuples of positions, and a list of the same length with 
    header strings and combine them into a dictionary.
    '''
    d = {}
    for i, j in enumerate(positions):
        header = headers[i]
        d[header] = j
    return d

def get_offsets_full(crime_offsets, crime_headers, victim_headers, offender_headers, line):
    
    crime_offsets.append(len(line)-1)
    
    crime_tuples = create_tuples(crime_offsets)
    
    assert len(crime_tuples) == len(crime_headers)
    crime_dict = create_dict(crime_tuples, crime_headers)
    
    victim_dict = create_dict(crime_dict['VICTIMS'], victim_headers)
    offender_dict = create_dict(crime_dict['OFFENDERS'], offender_headers)
    crime_dict['VICTIMS'] = victim_dict
    crime_dict['OFFENDERS'] = offender_dict
    
    return crime_dict

def process_line(offsets_dict, line):
    d = {}
    for key, value in offsets_dict.items():
        #Base case
        if type(value) != dict:
            string = line[value[0]:value[1]]
            stripped = string.strip()
            if stripped == '':
                stripped = None
            d[key] = stripped
        #Recursive case(value is a dictionary)
        else:
            val = process_line(value, line)
            d[key] = [val]
    return d

def full_line(crime_offsets, crime_headers, victim_headers, offender_headers, line):
    '''
    Provide a list of crime offsets, with victim and offender as sublists, lists of headers and the full line.
    Returns complete dictionary for full line.
    '''
    offsets_dict = get_offsets_full(crime_offsets, crime_headers, victim_headers, offender_headers, line)
    d = process_line(offsets_dict, line)
    return d