# U.S. Medical Insurance Costs

### Import the data

In [10]:
import csv

with open('insurance.csv', 'r') as insurance_csv:
    data = []
    csv_data = csv.DictReader(insurance_csv)
    
    for row in csv_data:
        data.append(row)
    # data is NOW a list of dicts (i.e. rows)

### Determine Average Age

In [2]:
def average_age(data):
    ages_total = 0
    for item in data:
        ages_total += int(item['age'])
        
    average = round((ages_total / len(data)), 2)
    print(f'The average age of those recorded is {average} years.')
        
average_age(data)


The average age of those recorded is 39.21 years.


### Majority Area

In [3]:
def majority_area(data):
    # create a list for the area dicts 
    areas = {}
    
    # iterate through everyone
    for row in data:
        # if their area is in the dict, += 1
        row_region = row['region']
        if row['region'] in list(areas.keys()):
            areas[row_region] += 1
        # else, create new dict and set at 1
        else:
            # new_region = {row_region: 1}
            areas.update({row_region: 1})
    
    # return dict with highest value
    majority_area = max(areas, key=areas.get).title()
    print(f'The area with the most insured people on record is the {majority_area}.')
    
majority_area(data)

The area with the most insured people on record is the Southeast.


### Average Difference Between Cost of Smokers/Non-Smokers

In [4]:
def diff_smoking(data):
    smokers_total = 0
    smokers_costs = 0
    non_smokers_total = 0
    non_smokers_costs = 0
    for person in data:
        if person['smoker'] == 'yes':
            smokers_total += 1
            smokers_costs += float(person['charges'])
        else: 
            non_smokers_total += 1
            non_smokers_costs += float(person['charges'])

    smokers_avg = smokers_costs / smokers_total
    non_smokers_avg = non_smokers_costs / non_smokers_total
    diff = round(smokers_avg - non_smokers_avg, 2)
    
    print(f'The average non-smoker costs ${diff} less than the average smoker.')
        
        
diff_smoking(data)


The average non-smoker costs $23615.96 less than the average smoker.


### Average age for those with kids

In [5]:
def avg_age_with_kids(data):
    with_kids_counter = 0
    with_kids_ages = 0
    for person in data:
        if person['children'] != '0':
            with_kids_counter += 1
            with_kids_ages += int(person['age'])
        else: 
            pass

    avg_age_with_kids = round((with_kids_ages / with_kids_counter), 1)
    
    print(f'The average age of persons with children is {avg_age_with_kids} years.')
        
        
avg_age_with_kids(data)


The average age of persons with children is 39.8 years.


### Costs increase from one age to the next (by five years)

In [6]:
# highest_age = 0
# for row in data:
#     if int(row['age']) > highest_age:
#         highest_age = int(row['age'])

# print(highest_age)
# 64

In [7]:
def five_yr_increases(data):
    # set ranges in a list?
    ages = [i + 5 for i in range(20, 65, 5)]
    # make a dict for age range and avg cost
    grouped_costs = {key: 0 for key in ages}
    # print(grouped_costs)
    # {25: 0, 30: 0, 35: 0, 40: 0, 45: 0, 50: 0, 55: 0, 60: 0, 65: 0}

    # total for an age range
    # i = 0
    for i in range(len(ages) - 1):
        # totals for this age range
        total_costs = 0 
        counter = 0
        for person in data:
            if ages[i] <= int(person['age']) < ages[i + 1]:
                total_costs += float(person['charges'])
                counter += 1
        # avg for this range
        average = round((total_costs / counter), 2)
        grouped_costs.update({ages[i]: average})
                
        # print(grouped_costs)
    
        print(f'The average costs for those aged {ages[i]} to {(ages[i + 1] - 1)} was ${average}.')
        
five_yr_increases(data)

The average costs for those aged 25 to 29 was $9524.78.
The average costs for those aged 30 to 34 was $11223.89.
The average costs for those aged 35 to 39 was $12282.51.
The average costs for those aged 40 to 44 was $13922.74.
The average costs for those aged 45 to 49 was $14845.89.
The average costs for those aged 50 to 54 was $16869.02.
The average costs for those aged 55 to 59 was $16077.64.
The average costs for those aged 60 to 64 was $21248.02.


### Average BMI of smokers v non-smokers

In [8]:
def diff_smoking(data):
    smokers_total = 0
    smokers_BMI = 0
    non_smokers_total = 0
    non_smokers_BMI = 0
    for person in data:
        if person['smoker'] == 'yes':
            smokers_total += 1
            smokers_BMI += float(person['bmi'])
        else: 
            non_smokers_total += 1
            non_smokers_BMI += float(person['bmi'])

    smokers_avg = round(smokers_BMI / smokers_total, 2)
    non_smokers_avg = round(non_smokers_BMI / non_smokers_total, 2)
    
    print(f'The the BMI of the average non-smoker is {non_smokers_avg} while that of the average smoker is {smokers_avg}.')
        
        
diff_smoking(data)


The the BMI of the average non-smoker is 30.65 while that of the average smoker is 30.71.


### Percentage of Smokers That Have Kids

In [9]:
def smokers_with_kids(data):
    smokers = 0
    smokers_with_kids = 0
    for person in data:
        if person['smoker'] == 'yes':
            smokers += 1
            if person['children'] != '0':
                smokers_with_kids += 1
    # print(smokers, smokers_with_kids)
    percent = round((smokers_with_kids / smokers), 4) * 100
    print(f'{percent}% of smokers have kids.')

smokers_with_kids(data)

58.03% of smokers have kids.
