# U.S. Medical Insurance Costs

## Importing the modules

In [1]:
import pandas as pd
from numpy import average
import csv

## Preparing the DataSet

#### Creating a list of rows from the dataset

In [2]:
dataset = []

with open("insurance.csv") as dataset_csv:
    dataset_raw = dataset_csv.readlines()
for i in dataset_raw:
    dataset.append(i.split(","))

#### Creating a dictionary from .csv file

In [3]:
dataset_dict = []
with open("insurance.csv") as insurance_csv:
    set_of_values = csv.DictReader(insurance_csv)
    for i in set_of_values:
        dataset_dict.append(i)

#### Creating a DataFrame

In [4]:
df = pd.DataFrame(pd.read_csv("insurance.csv"))
print(df)

      age     sex     bmi  children smoker     region      charges
0      19  female  27.900         0    yes  southwest  16884.92400
1      18    male  33.770         1     no  southeast   1725.55230
2      28    male  33.000         3     no  southeast   4449.46200
3      33    male  22.705         0     no  northwest  21984.47061
4      32    male  28.880         0     no  northwest   3866.85520
...   ...     ...     ...       ...    ...        ...          ...
1333   50    male  30.970         3     no  northwest  10600.54830
1334   18  female  31.920         0     no  northeast   2205.98080
1335   18  female  36.850         0     no  southeast   1629.83350
1336   21  female  25.800         0     no  southwest   2007.94500
1337   61  female  29.070         0    yes  northwest  29141.36030

[1338 rows x 7 columns]


## Finding the average value of insurance charges.

In [5]:
charges = []
for i in dataset:
    value = str(i[6])[:-2]
    if value != "charge":
        charges.append(float(value))
print("The average insurance cost for provided dataset is: ", round(average(charges),2))

The average insurance cost for provided dataset is:  13244.38


## Finding the average age of patients.

In [6]:
ages = []
for i in dataset:
    value = i[0]
    if i[0] != "age":
        ages.append(float(i[0]))
print("The average insurance age for provided dataset is: ", round(average(ages)))


The average insurance age for provided dataset is:  39


## Finding the region with the most of patients

In [7]:
northwest = 0
southeast = 0
southwest = 0
northeast = 0

for i in dataset:
    if i[5] == "northwest":
        northwest += 1
    elif i[5] == "southeast":
        southeast += 1
    elif i[5] == "southwest":
        southwest += 1
    elif i[5] == "northeast":
        northeast += 1
    else:
        pass

regions = {
    "northwest":northwest,
    "southeast":southeast,
    "southwest":southwest,
    "northeast":northeast
}

print(regions)

{'northwest': 325, 'southeast': 364, 'southwest': 325, 'northeast': 324}


## Comparison of average insurance costs for smokers and non-smokers

#### Pure Python

In [8]:
costs_smokers = 0
costs_nonsmokers = 0
num_smokers = 0
num_nonsmokers = 0

for i in dataset_dict:
    if i["smoker"] == 'yes':
        num_smokers += 1
        costs_smokers += float(i["charges"])
    elif i["smoker"] == 'no':
        num_nonsmokers += 1
        costs_nonsmokers += float(i["charges"])
    else:
        pass

print("Average cost for smokers - ", round(costs_smokers / num_smokers))
print("Average cost for nonsmokers - ", round(costs_nonsmokers / num_nonsmokers))

Average cost for smokers -  32050
Average cost for nonsmokers -  8434


#### Using Pandas Aggregate function

In [9]:
agg_func_for_smoking = {
    'charges': ['mean']
}
df.groupby(['smoker']).agg(agg_func_for_smoking).round(2)

Unnamed: 0_level_0,charges
Unnamed: 0_level_1,mean
smoker,Unnamed: 1_level_2
no,8434.27
yes,32050.23


## Average BMI

In [10]:
print("Average BMI in dataset - ",round(df['bmi'].agg('mean'),2))

Average BMI in dataset -  30.66


## Average insurance cost

In [11]:
print("Average insurance cost in dataset - ",str(round(df['charges'].agg('mean'),1))+"$")

Average insurance cost in dataset -  13270.4$


## Age analysis

In [12]:
agg_func_for_age = {
    'age': ['min','max','mean']
}
print("Average age: \n",df['age'].agg(['mean']).round(),"\n")
print("Maximum age: \n",df['age'].agg(['max']),"\n")
print("Minimum age: \n",df['age'].agg(['min']),"\n")

Average age: 
 mean    39.0
Name: age, dtype: float64 

Maximum age: 
 max    64
Name: age, dtype: int64 

Minimum age: 
 min    18
Name: age, dtype: int64 



## Male and female comparison

In [13]:
agg_func_for_sex = {
    'bmi': ['mean'],
    'charges': ['mean'],
    'age': ['mean'],
    'sex': ['count']
}

df.groupby(['sex']).agg(agg_func_for_sex).round(2)

Unnamed: 0_level_0,bmi,charges,age,sex
Unnamed: 0_level_1,mean,mean,mean,count
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
female,30.38,12569.58,39.5,662
male,30.94,13956.75,38.92,676
