# U.S. Medical Insurance Costs

In [76]:
import pandas as pd
df = pd.read_csv('insurance.csv')

# Hypothesis:

1. Smokers, being more sedentary, have more BMI than non-smokers
2. The obese pay more for their health care than the non-obese.
3. The 100 highest paying people are obese, smokers, or both. The 100 who pay the least do not smoke or are obese.
4. Women are more prone to obesity than men.
5. In adulthood, you begin to lose muscle and increase body mass, thus increasing obesity.


# Are there differences in the BMI of smokers and non-smokers?

In [77]:
smokers = df[df.smoker == 'yes']
non_smokers = df[df.smoker == 'no']
total_n = df.smoker.count()
bmi_of_smokers = smokers.bmi.sum() / smokers.bmi.count()
bmi_of_non_smokers = non_smokers.bmi.sum() / non_smokers.bmi.count()
print("Smokers BMI promedy: " + str(bmi_of_smokers))
print("Non smokers BMI promedy: " + str(bmi_of_non_smokers))
print("As we can see, there is no a big diference in BMI between smokers and non-smokers")

Smokers BMI promedy: 30.70844890510949
Non smokers BMI promedy: 30.651795112781954
As we can see, there is no a big diference in BMI between smokers and non-smokers


# How much more do the obese pay on average compared to the non-obese?


In [78]:
#A 30 o higher level of BMI reflects obesity.
obese_people = df[df.bmi >= 30]
non_obese_people = df[df.bmi < 30]
charges_promedy_obeses = obese_people.charges.sum() / obese_people.bmi.count()
charges_promedy_non_obeses = non_obese_people.charges.sum() / non_obese_people.bmi.count()
difference_between_obeses_and_non_obeses_charges = charges_promedy_obeses/charges_promedy_non_obeses * 100 - 100
print("Obeses promedy charges: " + str(charges_promedy_obeses))
print("Non-Obeses promedy charges: " + str(charges_promedy_non_obeses))
print("As we can see, obeses pay in promedy " + str(difference_between_obeses_and_non_obeses_charges) + " percent more")

Obeses promedy charges: 15552.335468868458
Non-Obeses promedy charges: 10713.666900584787
As we can see, obeses pay in promedy 45.163515098827276 percent more


# Are the 100 highest paying people obese, smokers, or both?

In [79]:
df_sort_by_charges = df.sort_values(by=['charges'], ascending=False)
top_hundred_charges = df_sort_by_charges[0:100]
num_of_smokers = top_hundred_charges[top_hundred_charges.smoker == 'yes'].count()
num_of_obese = top_hundred_charges[top_hundred_charges.bmi >= 30].count()
print("The " + str((num_of_smokers.smoker/100) * 100) + "% of top hundred charges are smokers")
print("The " + str((num_of_obese.bmi/100) * 100) + "% of top hundred charges are obese")

df_sort_by_charges_asc = df.sort_values(by=['charges'])
low_hundred_charges = df_sort_by_charges[-101:-1]
num_of_smokers2 = low_hundred_charges[low_hundred_charges.smoker == 'yes'].count()
num_of_obese2 = low_hundred_charges[low_hundred_charges.bmi >= 30].count()
print("The " + str((num_of_smokers2.smoker/100) * 100) + "% of low hundred charges are smokers")
print("The " + str((num_of_obese2.bmi/100) * 100) + "% of low hundred charges are obese")

The 100.0% of top hundred charges are smokers
The 100.0% of top hundred charges are obese
The 0.0% of low hundred charges are smokers
The 51.0% of low hundred charges are obese


# Is there a greater tendency to obesity among women or among men?


In [80]:
total_female = df[df.sex == 'female']
total_male = df[df.sex == 'male']
female_obese = df[(df.bmi >= 30) & (df.sex == 'female')]
male_obese = df[(df.bmi >= 30) & (df.sex == 'male')]
female_obese_percent = (female_obese.age.count() / total_female.age.count()) * 100
male_obese_percent = (male_obese.age.count() / total_male.age.count()) * 100
print("% of female obese: " + str(female_obese_percent))
print("% of male obese: " + str(male_obese_percent))
print("There are more obeses between men than woman")

% of female obese: 50.453172205438065
% of male obese: 55.17751479289941
There are more obeses between men than woman


# Between what ages does the most BMI accumulate?

In [75]:
bmi_promedy_18_to_25 = (df[(df.age >= 18) & (df.age <= 25)].bmi.sum()) / (df[(df.age >= 18) & (df.age <= 25)].bmi.count())
bmi_promedy_26_to_33 = (df[(df.age >= 26) & (df.age <= 33)].bmi.sum()) / (df[(df.age >= 26) & (df.age <= 33)].bmi.count())
bmi_promedy_34_to_41 = (df[(df.age >= 34) & (df.age <= 41)].bmi.sum()) / (df[(df.age >= 34) & (df.age <= 41)].bmi.count())
bmi_promedy_42_to_49 = (df[(df.age >= 42) & (df.age <= 49)].bmi.sum()) / (df[(df.age >= 42) & (df.age <= 49)].bmi.count())
bmi_promedy_50_to_57 = (df[(df.age >= 50) & (df.age <= 57)].bmi.sum()) / (df[(df.age >= 50) & (df.age <= 57)].bmi.count())
bmi_promedy_58_to_65 = (df[(df.age >= 58) & (df.age <= 65)].bmi.sum()) / (df[(df.age >= 58) & (df.age <= 65)].bmi.count())

bmi_promedies = [bmi_promedy_18_to_25, 
                 bmi_promedy_26_to_33, 
                 bmi_promedy_34_to_41, 
                 bmi_promedy_42_to_49, 
                 bmi_promedy_50_to_57, 
                 bmi_promedy_58_to_65]

differences_in_promedies = []
i = 1
for promedies in bmi_promedies:
    if promedies == bmi_promedy_58_to_65:
        break
    differences_in_promedies.append(bmi_promedies[i] - promedies)
    i+=1

print(differences_in_promedies)

for t in range(5):
    if t == 0:
        print("Between the 18 to 25 age group and the 26 to 33 age group there is a percentage increase in BMI of " +
              str((differences_in_promedies[t] / bmi_promedies[t]) * 100))
    if t == 1:
        print("Between the 26 to 33 age group and the 34 to 41 age group there is a percentage increase in BMI of " +
              str((differences_in_promedies[t] / bmi_promedies[t]) * 100))
    if t == 2:
        print("Between the 34 to 41 age group and the 42 to 49 age group there is a percentage increase in BMI of " +
              str((differences_in_promedies[t] / bmi_promedies[t]) * 100))
    if t == 3:
        print("Between the 42 to 49 age group and the 50 to 57 age group there is a percentage increase in BMI of " +
              str((differences_in_promedies[t] / bmi_promedies[t]) * 100))
    if t == 4:
        print("Between the 50 to 57 age group and the 58 to 65 age group there is a percentage increase in BMI of " +
              str((differences_in_promedies[t] / bmi_promedies[t]) * 100))
    
print("As we can see, when adults go from the age group 42 to 49 to the age group 50 to 47, they experience the greatest growth in body mass.")

[0.0793982108972564, 0.2736147015848083, 0.3220785907859067, 0.7972388134741095, 0.42345698598388637]
Between the 18 to 25 age group and the 26 to 33 age group there is a percentage increase in BMI of 0.2645958510665737
Between the 26 to 33 age group and the 34 to 41 age group there is a percentage increase in BMI of 0.9094192330764479
Between the 34 to 41 age group and the 42 to 49 age group there is a percentage increase in BMI of 1.0608521397218111
Between the 42 to 49 age group and the 50 to 57 age group there is a percentage increase in BMI of 2.59835494827552
Between the 50 to 57 age group and the 58 to 65 age group there is a percentage increase in BMI of 1.3451754978423043
As we can see, when adults go from the age group 42 to 49 to the age group 50 to 47, they experience the greatest growth in body mass.


# Conclusion

1. Smokers, being more sedentary, have more BMI than non-smokers. FALSE.
2. The obese pay more for their health care than the non-obese. TRUE.
3. The 100 highest paying people are obese, smokers, or both. The 100 who pay the least do not smoke or are obese. TRUE - FALSE.
4. Women are more prone to obesity than men. FALSE.
5. In adulthood, you begin to lose muscle and increase body mass, thus increasing obesity. TRUE but...