### Introduction to the Data: Births in the US

In [3]:
f = open("US_births_1994-2003_CDC_NCHS.csv", "r")
text = f.read()
lines = text.split("\n")
print(lines[0:10])

['year,month,date_of_month,day_of_week,births', '1994,1,1,6,8096', '1994,1,2,7,7772', '1994,1,3,1,10142', '1994,1,4,2,11248', '1994,1,5,3,11053', '1994,1,6,4,11406', '1994,1,7,5,11251', '1994,1,8,6,8653', '1994,1,9,7,7910']


### Convert Data Into List of Lists
Data needs to be converted into list or list where each nested list contains integer values. Header row needs to be removed.

In [4]:
def read_csv(csv_filename):
    string_list = open(csv_filename, "r").read().split("\n")[1:]
    final_list = []
    

    for item in string_list: 
        string_fields = item.split(",")
        int_fields = []
        for value in string_fields:
            int_fields.append(int(value))
        final_list.append(int_fields)
    
    return final_list 

In [5]:
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
print(cdc_list[:10])

[[1994, 1, 1, 6, 8096], [1994, 1, 2, 7, 7772], [1994, 1, 3, 1, 10142], [1994, 1, 4, 2, 11248], [1994, 1, 5, 3, 11053], [1994, 1, 6, 4, 11406], [1994, 1, 7, 5, 11251], [1994, 1, 8, 6, 8653], [1994, 1, 9, 7, 7910], [1994, 1, 10, 1, 10498]]


### Analyzing total number of births that occured in each month

In [6]:
def month_births(birth_list):
    births_per_month = {}
    
    for item in birth_list:
        month = item[1]
        birth_num = item[4]
        if month in births_per_month:
            births_per_month[month] += birth_num
        else:
            births_per_month[month] = birth_num
            
    return births_per_month

In [7]:
cdc_month_births = month_births(cdc_list)
print(cdc_month_births)

{1: 3232517, 2: 3018140, 3: 3322069, 4: 3185314, 5: 3350907, 6: 3296530, 7: 3498783, 8: 3525858, 9: 3439698, 10: 3378814, 11: 3171647, 12: 3301860}


### Analyzing total number of births that occured each unique day of the week

In [8]:
def dow_births(birth_list):
    births_per_dow = {}
    
    for item in birth_list:
        dow = item[3]
        birth_num = item[4]
        if dow in births_per_dow:
            births_per_dow[dow] += birth_num
        else:
            births_per_dow[dow] = birth_num
            
    return births_per_dow

In [9]:
cdc_day_births = dow_births(cdc_list)
print(cdc_day_births)

{1: 5789166, 2: 6446196, 3: 6322855, 4: 6288429, 5: 6233657, 6: 4562111, 7: 4079723}


### Generalizing the Function

In [10]:
def calc_counts(data, column):
    totals = {}
    
    for item in data:
        parameter = item[column]
        num = item[4]
        if parameter in totals:
            totals[parameter] += num
        else:
            totals[parameter] = num
            
    return totals

In [11]:
cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)

In [12]:
cdc_year_births

{1994: 3952767,
 1995: 3899589,
 1996: 3891494,
 1997: 3880894,
 1998: 3941553,
 1999: 3959417,
 2000: 4058814,
 2001: 4025933,
 2002: 4021726,
 2003: 4089950}

In [13]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [14]:
cdc_dom_births

{1: 1276557,
 2: 1288739,
 3: 1304499,
 4: 1288154,
 5: 1299953,
 6: 1304474,
 7: 1310459,
 8: 1312297,
 9: 1303292,
 10: 1320764,
 11: 1314361,
 12: 1318437,
 13: 1277684,
 14: 1320153,
 15: 1319171,
 16: 1315192,
 17: 1324953,
 18: 1326855,
 19: 1318727,
 20: 1324821,
 21: 1322897,
 22: 1317381,
 23: 1293290,
 24: 1288083,
 25: 1272116,
 26: 1284796,
 27: 1294395,
 28: 1307685,
 29: 1223161,
 30: 1202095,
 31: 746696}

In [15]:
cdc_dow_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

### Calculating Min and Max of Dictionaries

In [16]:
def min_max(dictionary):
    minimum = min(dictionary.values())
    maximum = max(dictionary.values())
    min_and_max = [minimum, maximum]
    return min_and_max

In [17]:
min_max_dow = min_max(cdc_dow_births)
print(min_max_dow)

[4079723, 6446196]


### Calculating Differences Across Years

In [68]:
def year_differences(year_birth_dict):
    previous_year = 0
    previous_total = 0
    for key, value in sorted(year_birth_dict.items()):
        current_year = key
        current_total = value
        difference = 0
        if previous_year == 0:
            print("Birth change from previous year to " + str(current_year) + 
                  " not available.")
        elif current_total > previous_total:
            difference = current_total - previous_total
            print("From " + str(previous_year) + " to " + str(current_year) + 
                 " births grew by " + str(difference))
        elif current_total < previous_total:
            difference =  previous_total - current_total
            print("From " + str(previous_year) + " to " + str(current_year) + 
                 " births decreased by " + str(difference))
        previous_year = current_year
        previous_total = current_total

In [69]:
year_differences(cdc_year_births)

Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 53178
From 1995 to 1996 births decreased by 8095
From 1996 to 1997 births decreased by 10600
From 1997 to 1998 births grew by 60659
From 1998 to 1999 births grew by 17864
From 1999 to 2000 births grew by 99397
From 2000 to 2001 births decreased by 32881
From 2001 to 2002 births decreased by 4207
From 2002 to 2003 births grew by 68224


#### Generalizing Difference Calculation

In [66]:
def cross_year_differences(data, col=0):
    previous_year = 0
    previous_total = 0
    parameters = []
    
    #acquire list of what parameter is needed for differences across years
    for item in data:
        parameter = item[col]
        if parameter not in parameters:
            parameters.append(parameter)
    print(parameters)  
    
    #calculate total for each year for each parameter
    for value in sorted(parameters):
        births_per_parameter = {}
        for item in data:
            year = item[0]
            total = item[4]
            if item[col] == value:
                if year in births_per_parameter:
                    births_per_parameter[year] += total
                else:
                    births_per_parameter[year] = total
        print("For " + str(value) + ":")   
        #skip resetting previous year values if only interested in year differences
        if col != 0:
            previous_year = 0
            previous_total = 0
        
        #display results and calucate differences in consecutive years
        for key, value in sorted(births_per_parameter.items()):
            current_year = key
            current_total = value
            difference = 0
            if previous_year == 0:
                print("Birth change from previous year to " + str(current_year) + 
                          " not available.")
            elif current_total > previous_total:
                difference = current_total - previous_total
                print("From " + str(previous_year) + " to " + str(current_year) + 
                         " births grew by " + str(difference))
            elif current_total < previous_total:
                difference =  previous_total - current_total
                print("From " + str(previous_year) + " to " + str(current_year) + 
                         " births decreased by " + str(difference))
            previous_year = current_year
            previous_total = current_total

        

In [57]:
#difference in each month per year
cross_year_differences(cdc_list,1)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
For 1:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 4692
From 1995 to 1996 births decreased by 1730
From 1996 to 1997 births grew by 2928
From 1997 to 1998 births grew by 2129
From 1998 to 1999 births decreased by 158
From 1999 to 2000 births grew by 10926
From 2000 to 2001 births grew by 5090
From 2001 to 2002 births decreased by 4524
From 2002 to 2003 births decreased by 871
For 2:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 6233
From 1995 to 1996 births grew by 6669
From 1996 to 1997 births decreased by 10222
From 1997 to 1998 births grew by 7170
From 1998 to 1999 births decreased by 1143
From 1999 to 2000 births grew by 19809
From 2000 to 2001 births decreased by 13843
From 2001 to 2002 births grew by 443
From 2002 to 2003 births grew by 3271
For 3:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 11233


In [58]:
#difference in each date of the month per year
cross_year_differences(cdc_list,2)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
For 1:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 4520
From 1995 to 1996 births decreased by 2389
From 1996 to 1997 births decreased by 4745
From 1997 to 1998 births grew by 3585
From 1998 to 1999 births grew by 6043
From 1999 to 2000 births grew by 4070
From 2000 to 2001 births decreased by 8062
From 2001 to 2002 births grew by 2226
From 2002 to 2003 births decreased by 3857
For 2:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 4872
From 1995 to 1996 births grew by 2576
From 1996 to 1997 births decreased by 1829
From 1997 to 1998 births grew by 7479
From 1998 to 1999 births grew by 2029
From 1999 to 2000 births decreased by 6624
From 2000 to 2001 births grew by 5204
From 2001 to 2002 births decreased by 5048
From 2002 to 2003 births grew by 3863
For 3:
Birth change from previo

In [59]:
#difference in each day of the week per year
cross_year_differences(cdc_list,3)

[6, 7, 1, 2, 3, 4, 5]
For 1:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 11276
From 1995 to 1996 births grew by 11947
From 1996 to 1997 births decreased by 4561
From 1997 to 1998 births grew by 7040
From 1998 to 1999 births grew by 1136
From 1999 to 2000 births grew by 12354
From 2000 to 2001 births grew by 7874
From 2001 to 2002 births grew by 2368
From 2002 to 2003 births grew by 14587
For 2:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 12633
From 1995 to 1996 births grew by 12662
From 1996 to 1997 births decreased by 1452
From 1997 to 1998 births grew by 15052
From 1998 to 1999 births grew by 1597
From 1999 to 2000 births grew by 7735
From 2000 to 2001 births decreased by 3967
From 2001 to 2002 births grew by 16996
From 2002 to 2003 births grew by 9243
For 3:
Birth change from previous year to 1994 not available.
From 1994 to 1995 births decreased by 5580
From 1995 to 1996 births decreas

In [67]:
#difference per year
cross_year_differences(cdc_list,0)

[1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003]
For 1994:
Birth change from previous year to 1994 not available.
For 1995:
From 1994 to 1995 births decreased by 53178
For 1996:
From 1995 to 1996 births decreased by 8095
For 1997:
From 1996 to 1997 births decreased by 10600
For 1998:
From 1997 to 1998 births grew by 60659
For 1999:
From 1998 to 1999 births grew by 17864
For 2000:
From 1999 to 2000 births grew by 99397
For 2001:
From 2000 to 2001 births decreased by 32881
For 2002:
From 2001 to 2002 births decreased by 4207
For 2003:
From 2002 to 2003 births grew by 68224
