# U.S. Medical Insurance Costs

In [None]:
#PROJECT OBJECTIVES:

# Analyze a dataset by building out functions or class methods
# Use libraries to assist in your analysis
# Optional: Document and organize your findings
# Optional: Make predictions about a dataset’s features based on your findings

In [9]:
import csv
import pandas as pd

with open('insurance.csv') as csv_file:
    insurance_data = csv.reader(csv_file)

In [11]:
df = pd.read_csv('insurance.csv')
print(df)

      age     sex     bmi  children smoker     region      charges
0      19  female  27.900         0    yes  southwest  16884.92400
1      18    male  33.770         1     no  southeast   1725.55230
2      28    male  33.000         3     no  southeast   4449.46200
3      33    male  22.705         0     no  northwest  21984.47061
4      32    male  28.880         0     no  northwest   3866.85520
...   ...     ...     ...       ...    ...        ...          ...
1333   50    male  30.970         3     no  northwest  10600.54830
1334   18  female  31.920         0     no  northeast   2205.98080
1335   18  female  36.850         0     no  southeast   1629.83350
1336   21  female  25.800         0     no  southwest   2007.94500
1337   61  female  29.070         0    yes  northwest  29141.36030

[1338 rows x 7 columns]


Transfer data from CSV into a List:

In [16]:
insurance_data = []

with open('insurance.csv') as csv_file:
    insurance_reader = csv.DictReader(csv_file, delimiter = ",")
    for i in insurance_reader:
        insurance_data.append(i)

unique_regions = []
for person in insurance_data:
    if person["region"] not in unique_regions:
        unique_regions.append(person["region"])

print(unique_regions)

['southwest', 'southeast', 'northwest', 'northeast']


Male / Female Patient Breakdown

In [42]:
def sex_count(insurance_data):

    male_patients = 0
    female_patients = 0

    for person in insurance_data:
        if person['sex'] == 'female':
            female_patients += 1
        if person['sex'] == 'male':
            male_patients += 1
   
    total_patients = male_patients + female_patients
    pct_male = round(male_patients / total_patients * 100, 0)
    pct_female = round(female_patients / total_patients * 100, 0)

    print('There are ' + str(total_patients) + ' total patients in the dataset.')
    print('The total number of male patients is ' + str(male_patients) + ' (' + str(pct_male) + '% of total.')
    print('The total number of female patients is ' + str(female_patients) + ' (' + str(pct_female) + '% of total.')

print(sex_count(insurance_data))

There are 1338 total patients in the dataset.
The total number of male patients is 676 (51.0% of total.
The total number of female patients is 662 (49.0% of total.
None


Average Age of the Patients

In [47]:
def average_age(insurance_data):

    total_age = 0
    total_patients = 0

    for person in insurance_data:
        total_age += int(person['age'])
        total_patients += 1

    average_age = round(total_age / total_patients, 1)

    print('The average age of the patients surveyed is ' + str(average_age) + '.')

print(average_age(insurance_data))
    

The average age of the patients surveyed is 39.2.
None


Regional Breakdown of Patients

In [57]:
def regional_breakdown(insurance_data):
   
    southwest = 0
    southeast = 0
    northwest = 0
    northeast = 0
        
    for person in insurance_data:
        if person['region'] == 'southwest':
            southwest += 1
        if person['region'] == 'southeast':
            southeast += 1
        if person['region'] == 'northwest':
            northwest += 1
        if person['region'] == 'northeast':
            northeast += 1

    total = southwest + southeast + northwest + northeast
    if total > 0:
        print('Southwest: ' + str(southwest) + ' (' + str(round((southwest / total) * 100)) + '% of total)')
        print('Southeast: ' + str(southeast) + ' (' + str(round((southeast / total) * 100)) + '% of total)')
        print('Northwest: ' + str(northwest) + ' (' + str(round((northwest / total) * 100)) + '% of total)')
        print('Northeast: ' + str(northeast) + ' (' + str(round((northeast / total) * 100)) + '% of total)')

print(regional_breakdown(insurance_data))

Southwest: 325 (24% of total)
Southeast: 364 (27% of total)
Northwest: 325 (24% of total)
Northeast: 324 (24% of total)
None


Costs Between Smokers & Non-Smokers

In [66]:
def smoker_insurance_cost(insurance_data):

    total_smoker_charges = 0 
    total_smokers = 0
    total_nonsmoker_charges = 0
    total_nonsmokers = 0

    for person in insurance_data:
        if person['smoker'] == 'yes':
            total_smoker_charges += float(person['charges'])
            total_smokers += 1
        else:
            total_nonsmoker_charges += float(person['charges'])
            total_nonsmokers += 1

    average_smoker_charge = round((total_smoker_charges / total_smokers), 0)
    average_nonsmoker_charge = round((total_nonsmoker_charges / total_nonsmokers), 0)

    print('Avg. Smoker Insurance Cost: $' + str(average_smoker_charge))
    print('Avg. Nonsmoker Insurance Cost: $' + str(average_nonsmoker_charge))

print(smoker_insurance_cost(insurance_data))
    

Avg. Smoker Insurance Cost: $32050.0
Avg. Nonsmoker Insurance Cost: $8434.0
None


In [85]:
def smoker_by_sex(insurance_data):

    total_male_smokers = 0
    male_smoker_insurance = 0
    total_female_smokers = 0
    female_smoker_insurance = 0

    for person in insurance_data:
        if person['sex'] == 'male' and person['smoker'] == 'yes':
            total_male_smokers += 1
            male_smoker_insurance += float(person['charges'])
        if person['sex'] == 'female' and person['smoker'] == 'yes':
            total_female_smokers += 1
            female_smoker_insurance += float(person['charges'])

    avg_male_smoker_ins = round(male_smoker_insurance / total_male_smokers, 0)
    avg_female_smoker_ins = round(female_smoker_insurance / total_female_smokers, 0)

    print('There are ' + str(total_male_smokers) + ' male smokers in the dataset, with an average insurnace cost of $' + str(avg_male_smoker_ins) + '.')
    print('There are ' + str(total_female_smokers) + ' female smokers in the dataset, with an average insurnace cost of $' + str(avg_female_smoker_ins) + '.')

smoker_by_sex(insurance_data)
    

There are 159 male smokers in the dataset, with an average insurnace cost of $33042.0.
There are 115 female smokers in the dataset, with an average insurnace cost of $30679.0.


In [87]:
# where are smokers most prevalent?

def smoker_by_region(insurance_data):

    sw_patients = 0
    se_patients = 0
    nw_patients = 0
    ne_patients = 0

    for person in insurance_data:
        if person['region'] == 'southwest':
            sw_patients += 1
        if person['region'] == 'southeast':
            se_patients += 1
        if person['region'] == 'northwest':
            nw_patients += 1
        if person['region'] == 'northeast':
            ne_patients += 1
        
    sw_smokers = 0
    se_smokers = 0
    nw_smokers = 0
    ne_smokers = 0

    for person in insurance_data:
        if person['smoker'] == 'yes' and person['region'] == 'southwest':
            sw_smokers += 1
        if person['smoker'] == 'yes' and person['region'] == 'southeast':
            se_smokers += 1
        if person['smoker'] == 'yes' and person['region'] == 'northwest':
            nw_smokers += 1
        if person['smoker'] == 'yes' and person['region'] == 'northeast':
            ne_smokers += 1

    sw_smoker_pct = round(sw_patients / sw_smokers, 0)
    se_smoker_pct = round(se_patients / se_smokers, 0)
    nw_smoker_pct = round(nw_patients / nw_smokers, 0)
    ne_smoker_pct = round(ne_patients / ne_smokers, 0)

    print('There are ' + str(sw_patients) + ' patients in the SW Region; ' + str(sw_smoker_pct) + '% are smokers.')
    print('There are ' + str(se_patients) + ' patients in the SE Region; ' + str(se_smoker_pct) + '% are smokers.')
    print('There are ' + str(nw_patients) + ' patients in the NW Region; ' + str(nw_smoker_pct) + '% are smokers.')
    print('There are ' + str(ne_patients) + ' patients in the NE Region; ' + str(ne_smoker_pct) + '% are smokers.')
    print(str(sw_smokers + se_smokers + nw_smokers + ne_smokers) + ' patients are smokers.')
smoker_by_region(insurance_data)


There are 325 patients in the SW Region; 6.0% are smokers.
There are 364 patients in the SE Region; 4.0% are smokers.
There are 325 patients in the NW Region; 6.0% are smokers.
There are 324 patients in the NE Region; 5.0% are smokers.
274 patients are smokers.


Figure out the Average Age for somone with one child in the dataset:

In [73]:
def avg_parent_age(insurance_data):

    total_parent_age = 0
    parent_count = 0

    for person in insurance_data:
        if int(person['children']) > 0:
            total_parent_age += int(person['age'])
            parent_count += 1
        
    if parent_count > 0:
        avg_parent_age = round(total_parent_age / parent_count, 0)
        return avg_parent_age
    else:
        return "No parent data available"
        
print('Avg. parent age: ' + str(avg_parent_age(insurance_data)))
    

Avg. parent age: 40.0


In [77]:
def children_histogram(insurance_data):

    children0 = 0
    children1 = 0
    children2 = 0
    children3 = 0
    children4 = 0

    for person in insurance_data:
        if int(person['children']) == 0:
            children0 += 0
        if int(person['children']) == 1:
            children1 += 1
        if int(person['children']) == 2:
            children2 += 1
        if int(person['children']) == 3:
            children3 += 1
        if int(person['children']) >= 4:
            children4 += 1
 

    return children0, children1, children2, children3, children4

print(children_histogram(insurance_data))
    

    

(0, 324, 240, 157, 43)
