In [28]:
# Write your imports here, so you can easily find and run them

from glob import glob

In [29]:
##################################################################################
def find_student_records( directory, extension = 'txt' ):
    """
    Return all roster filenames with specified 'extension' in a 'directory'
    input:
        directory - str, Directory that contains the roster files
        extension - str, defaults to 'txt'
    output:
        filenames - list, List of roster filenames in directory
    """

    filenames = glob( directory + '/*' + extension )

    return filenames

assert find_student_records('../Data/Roster')[0] == '../Data/Roster/Agatha_Bailey_798.txt'
assert find_student_records('../Data/Roster')[1] == '../Data/Roster/Agatha_Brooks_78.txt'



In [30]:
##################################################################################
def clean_dob(dob_string):
    '''
    Takes a date string of "M/D/YY" or "M/D/YYYY" and converts it to the month, day, and year parts as integers
    Returns those integer dates in a dictionary.
    input:
        * dob_string - str, birthday string of form "M/D/YY" or "M/D/YYYY"
    output:
        * dob - dictionary with keys day, month, and year
    '''
    date_dict = {}
    temp_date = dob_string.split('/')
    
    # Remember date is ordered as month, day, year
    date_dict['month'] = int( temp_date[0] )
    date_dict['day'] = int(temp_date[1])
    if len(temp_date[2]) == 2:
        date_dict['year'] = 1900 + int( temp_date[2] )
    elif len(temp_date[2]) == 4:
        date_dict['year'] = int( temp_date[2] )
    else:
        return 'There is a problem with input in clean_dob'
    
    return date_dict

assert clean_dob('07/12/68')['day'] == 12
assert clean_dob('7/12/68')['month'] == 7
assert clean_dob('07/12/1968')['year'] == 1968
assert clean_dob('7/12/118') == 'There is a problem with input in clean_dob'

In [31]:
##################################################################################
def calculate_age( birth_date, current_date ):
    """
    Calculates age on current_date of individual born on birth_date 
    input:
        * birth_date - dictionary with keys day, month, year
        * current_date - dictionary with keys day, month, year
    output:
        * age - integer
    """
    possible_age = current_date['year'] - birth_date['year']
    
    if current_date['month'] < birth_date['month']:
        age = possible_age - 1
    
    elif current_date['month'] == birth_date['month']:
        if current_date['day'] < birth_date['day']:
            age = possible_age - 1
        else:
            age = possible_age
    
    else:
        age  = possible_age
            
    return age

assert calculate_age( clean_dob('7/12/68'), clean_dob('9/10/2015') ) == 47
assert calculate_age( clean_dob('7/12/1968'), clean_dob('9/10/2015') ) == 47
assert calculate_age( clean_dob('7/12/1868'), clean_dob('9/10/2015') ) == 147

In [45]:
##################################################################################
def parse_student_record(filename):
    '''
    Parses a student record file into a dictionary
    input:
        filename - str, path to the file
    output:
        data - dict, student attribute data
    '''

    data = {}
    input_file = open(filename)
    
    lines = input_file.readlines()

    for line in lines:
        split_line =  line.strip('\n').split(':\t')

        # make sure the line has the correct number of parts
        if len(split_line) == 2:
            # Let's clean the data
            if split_line[0] == 'Date of Birth':
                data[split_line[0]] = clean_dob( split_line[1] )  
            elif split_line[0] == 'Weight':
                data[split_line[0]] = clean_weight( split_line[1] )
            else:
                data[split_line[0]] = split_line[1]

    return data


print(parse_student_record('../Data/Roster/Agatha_Bailey_798.txt'))

{'Weight': 220, 'Email Address': 'agatha.bailey@northwestern.edu', 'Department': 'Engineering', 'Height': '6ft,0in', 'Name': 'Agatha A. Bailey', 'Date of Birth': {'year': 1975, 'day': 10, 'month': 1}, 'Favorite Color': 'Lime', 'Zodiac Sign': 'January', 'Favorite Animal': 'Turtle'}


In [41]:
def clean_weight(word):
    '''
    This functions takes a string formatted as nnnlbs and returns 
    the weight in pounds as an integer.
    input:
        * word - str 
    output:
        * weight - int
    '''
    weight = int( word.strip('lbs') )
    
    return weight
    
assert clean_weight('55lbs') == 55
assert clean_weight('25 lbs') == 25