# U.S. Medical Insurance Costs

# Loading data

In [1]:
import csv

age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

with open("insurance.csv") as data:
  data_dict = csv.DictReader(data)
  for row in data_dict:
    age.append(int(row["age"]))
    sex.append(row["sex"])
    bmi.append(float(row["bmi"]))
    children.append(int(row["children"]))
    smoker.append(row["smoker"])
    region.append(row["region"])
    charges.append(float(row["charges"]))
    
print(age[0:5], sex[0:5], bmi[0:5], children[0:5])
print(smoker[0:5], region[0:5], charges[0:5])

[19, 18, 28, 33, 32] ['female', 'male', 'male', 'male', 'male'] [27.9, 33.77, 33.0, 22.705, 28.88] [0, 1, 3, 0, 0]
['yes', 'no', 'no', 'no', 'no'] ['southwest', 'southeast', 'southeast', 'northwest', 'northwest'] [16884.924, 1725.5523, 4449.462, 21984.47061, 3866.8552]


In [2]:
#representing sex as 0 (female) or 1 (male)
sex_num = [0 if person == 'female' else 1 if person == 'male' else None for person in sex]

print(sex_num[0:5])

#representing smoking status as 0 (not a smoker) or 1 (smoker)
smoker_num = [0 if ans == 'no' else 1 if ans == 'yes' else None for ans in smoker]

print(smoker_num[0:5])

[0, 1, 1, 1, 1]
[1, 0, 0, 0, 0]


In [3]:
#list with dictionaries for every record
datapoints = []
for i in range(len(age)):
    record = {
        "id" : i+1,
        "age" : age[i],
        "sex" : sex_num[i],
        "bmi" : bmi[i], 
        "children" : children[i],
        "smoker" : smoker_num[i],
        "region" : region[i],
        "charges" : charges[i]
    }
    datapoints.append(record)

print(datapoints[0:2])

[{'id': 1, 'age': 19, 'sex': 0, 'bmi': 27.9, 'children': 0, 'smoker': 1, 'region': 'southwest', 'charges': 16884.924}, {'id': 2, 'age': 18, 'sex': 1, 'bmi': 33.77, 'children': 1, 'smoker': 0, 'region': 'southeast', 'charges': 1725.5523}]


# Classes and helper functions
## Record Class
This class will be used to represent all info about a person (a record) in an object.

In [4]:
class Record():
    
    def __init__(self, age, sex, bmi, children, smoker, region, charges):
        self.age = age
        self.sex = sex #numeric format
        self.bmi = bmi
        self.children = children
        self.smoker = smoker #numeric format
        self.region = region
        self.charges = charges  


## Population Class
This class will represent populations (groups) of people as an object

In [5]:
class Group():
    
    def __init__(self, name):
        self.size = 0
        self.group = []
        self.name = name
        
    def __repr__(self):
        return f"group '{self.name}'"
    
    #adds a record to a group
    def add_person(self, record):
        self.size += 1
        self.group.append(record)
        
    def average_age(self):
        total = 0
        for record in self.group:
            total += record.age
        return total / self.size
    
    def fraction_of_men(self, roundto = 3):
        total = 0
        for record in self.group:
            total += record.sex
        return round(total / self.size, roundto)
    
    def fraction_of_women(self, roundto = 3):
        result = 1 - self.fraction_of_men(roundto)
        return round(result, roundto)
    
    def average_bmi(self, roundto = 3):
        total = 0
        for record in self.group:
            total += record.bmi
        return round(total / self.size, roundto) 
    
    def average_children(self):
        total = 0
        for record in self.group:
            total += record.children
        return total / self.size
    
    def fraction_of_smokers(self, roundto = 3):
        total = 0
        for record in self.group:
            total += record.smoker
        return round(total / self.size, roundto)
    
    def fraction_of_nonsmokers(self, roundto = 3):
        result = 1 - self.fraction_of_smokers(roundto)
        return round(result, roundto)
    
    def fraction_per_region(self, one_region, roundto = 3):
        total = 0
        for record in self.group:
            if record.region == one_region:
                total += 1
        return round(total / self.size, roundto)
    
    def average_charges(self, roundto = 4):
        total = 0
        for record in self.group:
            total += record.charges
        return round(total / self.size, roundto) 

## Function compare_groups
This function allows to compare a list of group averages for different variables (or parameters).

It compares groups with regard to 1 parameter, which could be:
1. "age" - prints average ages (for a list of groups),
2. "sex" - prints fraction of men, 
3. "bmi" - prints average bmi,
4. "children" - prints average amount of children per person, 
5. "smoker" - prints fraction of smokers,
6. region_name from the global variable region - prints fraction of people living in "region_name",
7. "smoker" - prints fraction of smokers,
6. "charges" - prints average charges.

In [24]:
def compare_groups(group_list,parameter):
    
    global region
    
    result = []
    value = None

    for group in group_list:
        
        if parameter == "age":
            value = group.average_age()
            print(f"{group} has average age {value}.")
        
        elif parameter == "sex":
            value = group.fraction_of_men() * 100
            print(f"{group} has {value}% of men.")
        
        elif parameter == "bmi":
            value = group.average_bmi()
            print(f"{group} has average BMI of {value}.")
        
        elif parameter == "children":
            value = group.average_children()
            if float(value) == 1.0:
                ending = "child"
            else:
                ending = "children"
            print(f"The {group} has on average {value} {ending}.")
        
        elif parameter == "smoker":
            value = group.fraction_of_smokers() * 100
            print(f"{group} has {value}% of smokers.")
    
        elif parameter in set(region):
            one_region = parameter
            value = group.fraction_per_region(one_region) * 100
            print(f"The {group} has {value}% of people in {one_region} region.")
        
        elif parameter == "charges":
            value = group.average_charges()
            print(f"{group} has average charges of {value}.")
            
        result.append((group, value))
       
    result.sort(key=lambda y: y[1])
    return result

In [25]:
#testing creation of Groups and group methods

group1 = Group("first ten")

for datapoint in datapoints[0:10]:
    record = Record(
        datapoint["age"],
        datapoint["sex"],
        datapoint["bmi"],
        datapoint["children"],
        datapoint["smoker"],
        datapoint["region"],
        datapoint["charges"])
    group1.add_person(record)

group2 = Group("11-20")    
    
for datapoint in datapoints[10:20]:
    record = Record(
        datapoint["age"],
        datapoint["sex"],
        datapoint["bmi"],
        datapoint["children"],
        datapoint["smoker"],
        datapoint["region"],
        datapoint["charges"])
    group2.add_person(record)
    
#tested all methods and they worked as expected
#example of a test
print(group1.fraction_per_region('southeast'))
region[0:10]

0.4


['southwest',
 'southeast',
 'southeast',
 'northwest',
 'northwest',
 'southeast',
 'southeast',
 'northwest',
 'northeast',
 'northwest']

In [28]:
#testing the comparison function showed expected behaviour, for example:

print(compare_groups([group1,group2],"children")) #function prints a sentence per group and returns sorted list
print(children[0:10])
print(children[10:20], "\n")

print(compare_groups([group1,group2],"southeast"))
print(region[0:10])
print(region[10:20],  "\n")

print(compare_groups([group1,group2],"sex"))
print(sex[0:10])
print(sex[10:20],  "\n")

print(compare_groups([group1,group2],"charges"))
print(charges[0:10])
print(charges[10:20])

The group 'first ten' has on average 1.0 child.
The group '11-20' has on average 0.2 children.
[(group '11-20', 0.2), (group 'first ten', 1.0)]
[0, 1, 3, 0, 0, 0, 1, 3, 2, 0]
[0, 0, 0, 0, 0, 1, 1, 0, 0, 0] 

The group 'first ten' has 40.0% of people in southeast region.
The group '11-20' has 30.0% of people in southeast region.
[(group '11-20', 30.0), (group 'first ten', 40.0)]
['southwest', 'southeast', 'southeast', 'northwest', 'northwest', 'southeast', 'southeast', 'northwest', 'northeast', 'northwest']
['northeast', 'southeast', 'southwest', 'southeast', 'southeast', 'southwest', 'northeast', 'northeast', 'southwest', 'southwest'] 

group 'first ten' has 50.0% of men.
group '11-20' has 70.0% of men.
[(group 'first ten', 50.0), (group '11-20', 70.0)]
['female', 'male', 'male', 'male', 'male', 'female', 'female', 'female', 'male', 'female']
['male', 'female', 'male', 'female', 'male', 'male', 'female', 'male', 'male', 'male'] 

group 'first ten' has average charges of 10351.9529.
gro

# Exploring the data