# U.S. Medical Insurance Costs

## Goals:
- Find the average age of patients in the dataset.
- Analyze where the majority of the individuals are from.
- Look at the different costs between smoker vs. non-smokers.
- Figure out what the average age is for someone who has atleast one child in this dataset.


In [118]:
# import csv library
import csv
import statistics

In [119]:
# make empty lists to store values from insurance.csv
ages = []
sexes = []
bmis = []
num_children = []
smoker_statuses = []
regions = []
insurance_charges = []

In [120]:
# helper funciton to load csv data
def load_list_data(lst, csv_file, column_name):
    # open csv file
    with open(csv_file) as csv_info:
        # read the data from the csv file
        csv_dict = csv.DictReader(csv_info)
        # loop through the data of each row in the csv
        for row in csv_dict:
            # add the data from each row to a list
            lst.append(row[column_name])
        #return lst
    

In [121]:
# look at the data in insurance_csv_dict
load_list_data(ages, 'insurance.csv', 'age')
load_list_data(sexes, 'insurance.csv', 'sex')
load_list_data(bmis, 'insurance.csv', 'bmi')
load_list_data(num_children, 'insurance.csv', 'children')
load_list_data(smoker_statuses, 'insurance.csv', 'smoker')
load_list_data(regions, 'insurance.csv', 'region')
load_list_data(insurance_charges, 'insurance.csv', 'charges')

In [122]:
# create a class that can be used to analyze patient info
class PatientsInfo:
    def __init__(self, ages, sexes, bmis, num_children, smoker_statuses, regions, insurance_charges):
        self.ages = ages
        self.sexes = sexes
        self.bmis = bmis
        self.num_children = num_children
        self.smoker_statuses = smoker_statuses
        self.regions = regions
        self.insurance_charges = insurance_charges
    
    # finds the average age in the dataset
    def average_age(self):
        sum = 0
        for age in self.ages:
            sum += int(age)
        return sum / len(self.ages)
    
    # finds the majority region in the dataset
    def majority_region(self):
        return statistics.mode(self.regions)

    # prints the average cost for insurance for smokers vs non smokers
    def smoker_vs_nonsmoker_costs(self):
        patients = zip(self.smoker_statuses, self.insurance_charges)
        smokers_charges = []
        non_smokers_charges = []
        for patient in patients:
            if(patient[0] == 'yes'):
                smokers_charges.append(float(patient[1]))
            else:
                non_smokers_charges.append(float(patient[1]))

        avg_costs_smokers = statistics.mean(smokers_charges)
        avg_costs_non_smokers = statistics.mean(non_smokers_charges)
        
        print('The average cost for smokers is $ {:.2f}'.format(avg_costs_smokers))
        print('The average cost for non smokers is $ {:.2f}'.format(avg_costs_non_smokers))
    
    # finds the average age for patients with one child 
    def average_age_one_child(self):
        one_child_patients = []
        patients = zip(self.ages, self.num_children)
        for patient in patients:
            if patient[1] == '1':
                one_child_patients.append(int(patient[0]))
        avg_age = statistics.mean(one_child_patients)
        return avg_age
        

In [123]:
# creates the patient_info class
patient_info = PatientsInfo(ages, sexes, bmis, num_children, smoker_statuses, regions, insurance_charges)

In [124]:
# prints out the values for each method in the patients_info class

avg_age = patient_info.average_age()
print('The average age in the dataset is {:.0f}'.format(avg_age))
majority_region = patient_info.majority_region()
print('The majority region in the dataset is {}'.format(majority_region))
patient_info.smoker_vs_nonsmoker_costs()
avg_age_one_child_patient = patient_info.average_age_one_child()
print('The average age for patients with one child is {:.0f}'.format(avg_age_one_child_patient))

The average age in the dataset is 39
The majority region in the dataset is southeast
The average cost for smokers is $ 32050.23
The average cost for non smokers is $ 8434.27
The average age for patients with one child is 39
