# Introduction to Dataset

In [1]:
f = open("US_births_1994-2003_CDC_NCHS.csv",'r')
data = f.read()
split_data = data.split('\n')
print(split_data[:10])

['year,month,date_of_month,day_of_week,births', '1994,1,1,6,8096', '1994,1,2,7,7772', '1994,1,3,1,10142', '1994,1,4,2,11248', '1994,1,5,3,11053', '1994,1,6,4,11406', '1994,1,7,5,11251', '1994,1,8,6,8653', '1994,1,9,7,7910']


# Converting Data To a List of Lists

In [2]:
def read_csv(csv_file):
    file = open(csv_file, 'r')
    file_string = file.read()
    split_file = file_string.split('\n')
    string_list = split_file[1:]
    final_list =[]
    for row in string_list:
        int_fields = []
        string_fields = row.split(',')
        for column in string_fields:
            int_fields.append(int(column))
        final_list.append(int_fields)
    return final_list

cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
print(cdc_list[:10])

[[1994, 1, 1, 6, 8096], [1994, 1, 2, 7, 7772], [1994, 1, 3, 1, 10142], [1994, 1, 4, 2, 11248], [1994, 1, 5, 3, 11053], [1994, 1, 6, 4, 11406], [1994, 1, 7, 5, 11251], [1994, 1, 8, 6, 8653], [1994, 1, 9, 7, 7910], [1994, 1, 10, 1, 10498]]


# Gathering Data for Specific Columns

In [3]:
def month_births(data_list):
    births_per_month = {}
    for day in data_list:
        month = day[1]
        births = day[4]
        if month in births_per_month:
            births_per_month[month] += births
        else:
            births_per_month[month] = births
    return births_per_month

cdc_month_births = month_births(cdc_list)
print(cdc_month_births)

{1: 3232517, 2: 3018140, 3: 3322069, 4: 3185314, 5: 3350907, 6: 3296530, 7: 3498783, 8: 3525858, 9: 3439698, 10: 3378814, 11: 3171647, 12: 3301860}


In [4]:
def dow_births(data_list):
    births_per_dow = {}
    for day in data_list:
        dow = day[3]
        births = day[4]
        if dow in births_per_dow:
            births_per_dow[dow] += births
        else:
            births_per_dow[dow] = births
    return births_per_dow

cdc_day_births = dow_births(cdc_list)
print(cdc_day_births)
    

{1: 5789166, 2: 6446196, 3: 6322855, 4: 6288429, 5: 6233657, 6: 4562111, 7: 4079723}


# Reusable Function for each Column

In [5]:
def calc_counts(data, column):
    births_per_column = {}
    for day in data:
        length_of_time = day[column]
        births = day[4]
        if length_of_time in births_per_column:
            births_per_column[length_of_time] += births
        else:
            births_per_column[length_of_time] = births
    return births_per_column

cdc_year_births = calc_counts(cdc_list, 0)
print(cdc_year_births)
cdc_month_births = calc_counts(cdc_list, 1)
print(cdc_month_births)
cdc_dom_births = calc_counts(cdc_list, 2)
print(cdc_dom_births)
cdc_dow_births = calc_counts(cdc_list, 3)
print(cdc_dow_births)

{2000: 4058814, 2001: 4025933, 2002: 4021726, 2003: 4089950, 1994: 3952767, 1995: 3899589, 1996: 3891494, 1997: 3880894, 1998: 3941553, 1999: 3959417}
{1: 3232517, 2: 3018140, 3: 3322069, 4: 3185314, 5: 3350907, 6: 3296530, 7: 3498783, 8: 3525858, 9: 3439698, 10: 3378814, 11: 3171647, 12: 3301860}
{1: 1276557, 2: 1288739, 3: 1304499, 4: 1288154, 5: 1299953, 6: 1304474, 7: 1310459, 8: 1312297, 9: 1303292, 10: 1320764, 11: 1314361, 12: 1318437, 13: 1277684, 14: 1320153, 15: 1319171, 16: 1315192, 17: 1324953, 18: 1326855, 19: 1318727, 20: 1324821, 21: 1322897, 22: 1317381, 23: 1293290, 24: 1288083, 25: 1272116, 26: 1284796, 27: 1294395, 28: 1307685, 29: 1223161, 30: 1202095, 31: 746696}
{1: 5789166, 2: 6446196, 3: 6322855, 4: 6288429, 5: 6233657, 6: 4562111, 7: 4079723}


# Min and Max Values for Each Dictionary

In [13]:
def min_and_max(dictionary):
    key = max(dictionary, key = dictionary.get)
    max_num = dictionary[key]
    key = min(dictionary, key = dictionary.get)
    min_num = dictionary[key]
    return min_num, max_num
print(min_and_max(cdc_year_births))
print(min_and_max(cdc_month_births))
print(min_and_max(cdc_dom_births))
print(min_and_max(cdc_dow_births))

(3880894, 4089950)
(3018140, 3525858)
(746696, 1326855)
(4079723, 6446196)


# Changes in 

In [34]:
def Column_per_Year_Comp(data, column, value):
    # Returns a dictionary with the change of births compared to the year before.
    
    yearly_values = {}
    
    # First the total births for a given column are separated by year
    for row in data:
        length_of_time = row[column]
        births = row[4]
        year = row[0]
        if year in yearly_values and length_of_time == value:
            yearly_values[year] += births
        elif year not in yearly_values and length_of_time == value:
            yearly_values[year] = births
        else:
            pass
        
    yearly_change = {}
    
    # The differences are taken to see the change from the year before
    
    for year, births in yearly_values.items():
        if year == 1994:
            yearly_change[year] = births
        else:
            yearly_change[year] = births - yearly_values[year-1]
    return yearly_change

Column_per_Year_Comp(cdc_list, 3, 7)
Column_per_Year_Comp(cdc_list, 1, 6)

{1994: 329737,
 1995: 68,
 1996: -11280,
 1997: 3342,
 1998: 5224,
 1999: 5110,
 2000: 9005,
 2001: -10121,
 2002: -3497,
 2003: 9837}