# US Medical Insurance Costs

In [185]:
import csv

### Creating lists with patient data

In [186]:
ages = []
sexes = []
bmis = []
num_children = []
smoker_status = []
regions = []
insurance_charges = []

In [187]:
def load_data(file, lst, header):
    with open(file, newline = '') as insurance:
        reader = csv.DictReader(insurance)
        for row in reader:
            lst.append(row[header])

In [188]:
load_data("insurance.csv", ages, "age")
load_data("insurance.csv", sexes, "sex")
load_data("insurance.csv", bmis, "bmi")
load_data("insurance.csv", num_children, "children")
load_data("insurance.csv", smoker_status, "smoker")
load_data("insurance.csv", regions, "region")
load_data("insurance.csv", insurance_charges, "charges")

### Defining functions which calculate averages

##### Average function which takes a list of numerical values as a parameter (age, smi, children, insurance charges)

In [189]:
def average(lst):
    summ = 0
    count = 0
    for data in lst:
        summ += float(data)
        count += 1
    
    return summ / count

#Average insurance cost for all patients
average(insurance_charges)

13270.422265141257

##### Average function which takes a list of dictionaries as a parameter

In [190]:
def average_dict(lst, flg):
    av_lst = []
    for patient in lst:
        for i, j in patient.items():
            if flg == True:
                av_lst.append(float(i))
            else:
                av_lst.append(float(j))
    
    return average(av_lst)

### Average cost and BMI by regions 

In [191]:
# 1 - Class Region with region, cost and bmi attributes

class Region:
    def __init__(self, region, bmi, charges):
        self.region = region
        self.bmi = bmi
        self.charges = charges

In [192]:
# 2 - Four lists of dicts with BMI:COST as KEY:VALUE

northeast = []
northwest = []
southeast = []
southwest = []

for region, bmi, charge in zip(regions, bmis, insurance_charges):
    patient = Region(region, bmi, charge)
    region_dict = {}
    region_dict[bmi] = charge
    
    if patient.region == "northeast":
        northeast.append(region_dict)
    elif patient.region == "northwest":
        northwest.append(region_dict)
    elif patient.region == "southeast":
        southeast.append(region_dict)
    else:
        southwest.append(region_dict)

In [193]:
# 3 - Average cost by region
ne_av_cost = round(average_dict(northeast, 0), 2)
nw_av_cost = round(average_dict(northwest, 0), 2)
se_av_cost = round(average_dict(southeast, 0), 2)
sw_av_cost = round(average_dict(southwest, 0), 2)

print("A Northeast resident pays $ {} in average.".format(ne_av_cost))
print("A Northwest resident pays $ {} in average.".format(nw_av_cost))
print("A Southeast resident pays $ {} in average.".format(se_av_cost))
print("A Southwest resident pays $ {} in average.".format(sw_av_cost))

A Northeast resident pays $ 13406.38 in average.
A Northwest resident pays $ 12417.58 in average.
A Southeast resident pays $ 14735.41 in average.
A Southwest resident pays $ 12346.94 in average.


In [194]:
# 4 - Average BMI by region
ne_av_bmi = round(average_dict(northeast, 1), 2)
nw_av_bmi = round(average_dict(northwest, 1), 2)
se_av_bmi = round(average_dict(southeast, 1), 2)
sw_av_bmi = round(average_dict(southwest, 1), 2)

print("A Northeast resident's average BMI is {}.".format(ne_av_bmi))
print("A Northwest resident's average BMI is {}.".format(nw_av_bmi))
print("A Southeast resident's average BMI is {}.".format(se_av_bmi))
print("A Southwest resident's average BMI is {}.".format(sw_av_bmi))

A Northeast resident's average BMI is 29.17.
A Northwest resident's average BMI is 29.2.
A Southeast resident's average BMI is 33.36.
A Southwest resident's average BMI is 30.6.


### Average age of people with and without children

In [195]:
# 1 - List of dicts with AGE:CHILDREN as KEY:VALUE

age_children_general = []
for age, child in zip(ages, num_children):
    age_children_dict = {}
    age_children_dict[age] = child
    age_children_general.append(age_children_dict)
    
print(age_children_general)

[{'19': '0'}, {'18': '1'}, {'28': '3'}, {'33': '0'}, {'32': '0'}, {'31': '0'}, {'46': '1'}, {'37': '3'}, {'37': '2'}, {'60': '0'}, {'25': '0'}, {'62': '0'}, {'23': '0'}, {'56': '0'}, {'27': '0'}, {'19': '1'}, {'52': '1'}, {'23': '0'}, {'56': '0'}, {'30': '0'}, {'60': '0'}, {'30': '1'}, {'18': '0'}, {'34': '1'}, {'37': '2'}, {'59': '3'}, {'63': '0'}, {'55': '2'}, {'23': '1'}, {'31': '2'}, {'22': '0'}, {'18': '0'}, {'19': '5'}, {'63': '0'}, {'28': '1'}, {'19': '0'}, {'62': '3'}, {'26': '0'}, {'35': '1'}, {'60': '0'}, {'24': '0'}, {'31': '2'}, {'41': '1'}, {'37': '2'}, {'38': '1'}, {'55': '0'}, {'18': '2'}, {'28': '0'}, {'60': '0'}, {'36': '1'}, {'18': '0'}, {'21': '2'}, {'48': '1'}, {'36': '0'}, {'40': '3'}, {'58': '2'}, {'58': '2'}, {'18': '2'}, {'53': '1'}, {'34': '2'}, {'43': '3'}, {'25': '4'}, {'64': '1'}, {'28': '1'}, {'20': '0'}, {'19': '0'}, {'61': '2'}, {'40': '1'}, {'40': '0'}, {'28': '3'}, {'27': '0'}, {'31': '5'}, {'53': '3'}, {'58': '1'}, {'44': '2'}, {'57': '0'}, {'29': '1'}

In [196]:
# 2 - Average age of people with all type of VALUES

age_children_no = []
age_children_1 = []
age_children_2 = []
age_children_3 = []
age_children_4 = []
age_children_5 = []

for patient in age_children_general:
    for age, children in patient.items():
        if children == '0':
            age_children_no.append(patient)
        elif children == '1':
            age_children_1.append(patient)
        elif children == '2':
            age_children_2.append(patient)
        elif children == '3':
            age_children_3.append(patient)
        elif children == '4':
            age_children_4.append(patient)
        else:
            age_children_5.append(patient)
            
print(age_children_5)

[{'19': '5'}, {'31': '5'}, {'20': '5'}, {'25': '5'}, {'45': '5'}, {'52': '5'}, {'49': '5'}, {'33': '5'}, {'33': '5'}, {'46': '5'}, {'39': '5'}, {'39': '5'}, {'20': '5'}, {'39': '5'}, {'41': '5'}, {'39': '5'}, {'28': '5'}, {'43': '5'}]


In [197]:
# 3 - Get the average age of all age-children age

average_children_no = average_dict(age_children_no, True)
average_children_yes = (average_dict(age_children_1, True) + average_dict(age_children_2, True) + average_dict(age_children_3, True) + average_dict(age_children_4, True) + average_dict(age_children_5, True)) / 5

print("The average age of patients with children is " + str(round(average_children_yes, 1)) + " years")
print("The average age of patients without children is " + str(round(average_children_no, 1)) + " years")

The average age of patients with children is 39.0 years
The average age of patients without children is 38.4 years


### How smoking affects the cost?

In [198]:
# 1 - Create a lost of dictionaries with SMOKER:CHARHES as KEY:VALUE
# 2 - Create two lists for smokers and non-smokers charges

smokers_lst = []
non_smokers_lst = []

for smoker, charges in zip(smoker_status, insurance_charges):
    smoking_dict = dict()
    smoking_dict.update({smoker:charges})
    
    if smoker == "yes":
        smokers_lst.append(float(smoking_dict[smoker]))
    else:
        non_smokers_lst.append(float(smoking_dict[smoker]))
    
print(smokers_lst)
print()
print(non_smokers_lst)

[16884.924, 27808.7251, 39611.7577, 36837.467, 37701.8768, 38711.0, 35585.576, 51194.55914, 39774.2763, 48173.361, 38709.176, 23568.272, 37742.5757, 47496.49445, 34303.1672, 23244.7902, 14711.7438, 17663.1442, 16577.7795, 37165.1638, 39836.519, 21098.55405, 43578.9394, 30184.9367, 47291.055, 22412.6485, 15820.699, 30942.1918, 17560.37975, 47055.5321, 19107.7796, 39556.4945, 17081.08, 32734.1863, 18972.495, 20745.9891, 40720.55105, 19964.7463, 21223.6758, 15518.18025, 36950.2567, 21348.706, 36149.4835, 48824.45, 43753.33705, 37133.8982, 20984.0936, 34779.615, 19515.5416, 19444.2658, 17352.6803, 38511.6283, 29523.1656, 12829.4551, 47305.305, 44260.7499, 41097.16175, 43921.1837, 33750.2918, 17085.2676, 24869.8368, 36219.40545, 46151.1245, 17179.522, 42856.838, 22331.5668, 48549.17835, 47896.79135, 42112.2356, 16297.846, 21978.6769, 38746.3551, 24873.3849, 42124.5153, 34838.873, 35491.64, 42760.5022, 47928.03, 48517.56315, 24393.6224, 41919.097, 13844.506, 36085.219, 18033.9679, 21659.9301

In [199]:
# 3 - Count average cost for both lists
smokers_average = average(smokers_lst)
non_smokers_average = average(non_smokers_lst)

print("The average insurance cost for smoking patients is " + str(smokers_average))
print("The average insurance cost for smoking patients is " + str(non_smokers_average))
if smokers_average > non_smokers_average:
    print("Smokers pay " + str(round(smokers_average / non_smokers_average)) + " times more than non-smokers.")
else:
    print("Non-smokers pay " + str(round(non_smokers_average / smokers_average)) + " times more than smokers.")

The average insurance cost for smoking patients is 32050.23183153285
The average insurance cost for smoking patients is 8434.268297856199
Smokers pay 4 times more than non-smokers.
