In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os
import folium
import json

In [2]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [3]:
path = r'C:\Users\aaron\OneDrive\Desktop\Health Insurance Analysis'

In [4]:
path

'C:\\Users\\aaron\\OneDrive\\Desktop\\Health Insurance Analysis'

In [5]:
# installing dataframe
df = pd.read_csv(os.path.join(path, '02 Data', 'Prepared data', 'insurance_checked.csv'), index_col = False)

# changing the region column to names that match the Geojson

In [7]:
df['region'] = df['region'].str.replace('northwest', 'Midwest')

In [8]:
df['region'] = df['region'].str.replace('southwest', 'West')

In [9]:
df['region'] = df['region'].str.replace('southeast', 'South')

In [10]:
df['region'] = df['region'].str.replace('northeast', 'Northeast')

In [11]:
country_geo = r'C:\Users\aaron\OneDrive\Desktop\Health Insurance Analysis\02 Data\Original data\geoJSON_us_regions.json'

In [12]:
# That's just in case you want to look at the JSON file contents here too:

f = open(r'C:\Users\aaron\OneDrive\Desktop\Health Insurance Analysis\02 Data\Original data\geoJSON_us_regions.json')
  
# returns JSON object asa dictionary
data = json.load(f)
  
# Iterating through the json list
for i in data['features']:
    print(i)

{'type': 'Feature', 'properties': {'name': 'Midwest'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-104.32617187499999, 49.03786794532644], [-103.974609375, 41.04621681452063], [-101.6015625, 41.04621681452063], [-102.48046875, 37.020098201368114], [-94.5703125, 37.09023980307208], [-93.603515625, 36.4566360115962], [-89.296875, 37.23032838760387], [-87.71484375, 38.272688535980976], [-85.4296875, 38.54816542304656], [-84.638671875, 39.16414104768742], [-83.232421875, 38.75408327579141], [-82.177734375, 38.685509760012], [-80.85937499999999, 39.842286020743394], [-80.595703125, 41.57436130598913], [-83.583984375, 41.96765920367816], [-82.79296874999999, 42.4234565179383], [-82.705078125, 43.389081939117496], [-83.232421875, 43.70759350405294], [-84.111328125, 43.96119063892024], [-83.49609375, 44.653024159812], [-84.55078125, 45.460130637921004], [-85.4296875, 45.1510532655634], [-86.396484375, 44.213709909702054], [-86.220703125, 43.389081939117496], [-86.220703125, 42.163403424

In [13]:
df.dtypes

age           int64
sex          object
bmi         float64
children      int64
smoker       object
region       object
charges     float64
dtype: object

In [39]:
# Create a data frame with just the region and the values for bmi we want plotted
data_to_plot = df[['region', 'bmi']].groupby('region').mean().reset_index()

In [41]:
data_to_plot.head()

Unnamed: 0,region,bmi
0,Midwest,29.200615
1,Northeast,29.173765
2,South,33.354945
3,West,30.596615


# visualization of the average bmi in each region

In [43]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 1.5)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = country_geo, 
    data = data_to_plot,
    columns = ['region', 'bmi'],
    key_on = 'feature.properties.name', # this part is very important - check your json file to see where the KEY is located
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "bmi").add_to(map)
folium.LayerControl().add_to(map)

map

# Observations

* Indivduals in the northeastern and midwest region on average have the lowest Bmi of 29.2-29.9
* The south has the highest bmi average of individuals obese in the 32.7-33.4 range
* There is not a region that is deemed ideal 'normal' with an average bmi of 18.5-25

In [47]:
# Create a data frame with just the region and the values for charges we want plotted
data_to_plot2 = df[['region', 'charges']].groupby('region').mean().reset_index()

In [49]:
data_to_plot2.head()

Unnamed: 0,region,charges
0,Midwest,12417.575169
1,Northeast,13406.384599
2,South,14735.411484
3,West,12346.937508


# visualization of the average insurance charges by region

In [53]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 1.5)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = country_geo, 
    data = data_to_plot2,
    columns = ['region', 'charges'],
    key_on = 'feature.properties.name', # this part is very important - check your json file to see where the KEY is located
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "charges").add_to(map)
folium.LayerControl().add_to(map)

map

# Observations

* The South region is showing to have a higher insurance charge average compared to the other three regions, that could be do to one of the factors being they also have the highest bmi rate for being obese.
* On the West coast and Midwest insurance charges on average are the lowest.

In [57]:
df.to_csv('Updated_region.csv', index=False) 