# U.S. Medical Insurance Costs

In [48]:
import csv

### Data Preprocessing

In [75]:
age_values = []
sex_values = []
bmi_values = []
num_of_children = []
smoker_values = []
regions = []
charges = []

# Open insurance.csv dataset and retrieve data.
# Data is placed into holder variables for each attribute
# to be zipped together later into a dictionary object format.
with open('insurance.csv') as insurance_file:
    
    # Dataset headers are: age,sex,bmi,children,smoker,region,charges
    reader = csv.DictReader(insurance_file)
    
    for row in reader:
        age_values.append(row["age"])
        sex_values.append(row["sex"])
        bmi_values.append(row["bmi"])
        num_of_children.append(row["children"])
        smoker_values.append(row["smoker"])
        regions.append(row["region"])
        charges.append(row["charges"])


# Print the number of rows in dataset
print(len(age_values))

1338


### Create Dictionary Object

In [93]:
# Create a dictonary to serve as skeleton for JSON object.
# 
insurance_data = {num:{} for num in range(0,1338)}

idx = 0
for id, data in insurance_data.items():
    data.update({"age" : age_values[idx],
                 "sex" : sex_values[idx],
                 "bmi" : bmi_values[idx],
                 "children" : num_of_children[idx],
                 "smoker" : smoker_values[idx],
                 "regions" : regions[idx],
                 "charges" : charges[idx]})
    idx += 1

#print(insurance_data)

### Calculate Average Age

In [129]:
# Calculate the average age in dataset
average_age = ((lambda arr : sum(map(int,arr)))(age_values))/len(age_values)
print("Average Age = {:.2f}".format(average_age))

Average Age = 39.21


### Calculate average BMI across all data

In [141]:
average_bmi = ((lambda arr : sum(map(float,arr)))(bmi_values)) / len(bmi_values)
print("Average BMI = {:.2f}".format(average_bmi))

Average BMI = 30.66


### Smoking Statistics

In [203]:
num_of_smokers = smoker_values.count("yes")
num_of_non_smokers = smoker_values.count("no")
print("Number of smokers = {}\nNumber of non-smokers = {}".format(num_of_smokers,num_of_non_smokers))
print("There are {:.2f} times as many non-smokers as there are smokers.".format(num_of_non_smokers/num_of_smokers))

def calculate_total_bmi(smoker_status,insurance_data):
    bmi = 0.0
    for id , data in insurance_data.items():
        if data.get("smoker") == smoker_status:
            bmi += float(data.get("bmi"))
    return bmi

average_smoker_bmi = calculate_total_bmi("yes",insurance_data)/num_of_smokers
average_non_smoker_bmi = calculate_total_bmi("no",insurance_data)/num_of_non_smokers

print("\nAverage smoker bmi = {:.2f}".format(average_smoker_bmi))
print("Average non-smoker bmi = {:.2f}".format(average_non_smoker_bmi))
print("On average a smoker's BMI is {:.3f} times higher than a non-smoker.".format(average_smoker_bmi/average_non_smoker_bmi))

Number of smokers = 274
Number of non-smokers = 1064
There are 3.88 times as many non-smokers as there are smokers.

Average smoker bmi = 30.71
Average non-smoker bmi = 30.65
On average a smoker's BMI is 1.002 times higher than a non-smoker.


In [147]:
insurance_data

{0: {'age': '19',
  'sex': 'female',
  'bmi': '27.9',
  'children': '0',
  'smoker': 'yes',
  'regions': 'southwest',
  'charges': '16884.924'},
 1: {'age': '18',
  'sex': 'male',
  'bmi': '33.77',
  'children': '1',
  'smoker': 'no',
  'regions': 'southeast',
  'charges': '1725.5523'},
 2: {'age': '28',
  'sex': 'male',
  'bmi': '33',
  'children': '3',
  'smoker': 'no',
  'regions': 'southeast',
  'charges': '4449.462'},
 3: {'age': '33',
  'sex': 'male',
  'bmi': '22.705',
  'children': '0',
  'smoker': 'no',
  'regions': 'northwest',
  'charges': '21984.47061'},
 4: {'age': '32',
  'sex': 'male',
  'bmi': '28.88',
  'children': '0',
  'smoker': 'no',
  'regions': 'northwest',
  'charges': '3866.8552'},
 5: {'age': '31',
  'sex': 'female',
  'bmi': '25.74',
  'children': '0',
  'smoker': 'no',
  'regions': 'southeast',
  'charges': '3756.6216'},
 6: {'age': '46',
  'sex': 'female',
  'bmi': '33.44',
  'children': '1',
  'smoker': 'no',
  'regions': 'southeast',
  'charges': '8240.58