# Medical Expense Dataset - Interaction Effects
Our data exploration suggested interaction effects in our data, which might explain its observed structure. Let's have a quick look at whether we were correct in assuming this

### Import

In [20]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import statsmodels.api as sm
from PIL import Image
from mpl_toolkits import mplot3d
from statsmodels.formula.api import ols

### Fonts
Define fonts to use plot fonts that are similar to the classic LaTEX fonts used for the equations

In [21]:
csfont = {'fontname':'Georgia'}
hfont = {'fontname':'Helvetica'}

### Load Colormap

In [22]:
im = Image.open("../../assets/colormaps/watercolours.png")
im = np.array(im)
colormap = im[:,0,:] / 255.0

### Load Data
This data is a subset of a dataset hosted on [Kaggle](https://www.kaggle.com/datasets/mirichoi0218/insurance)

In [34]:
data = pd.read_csv('../../data/healthcare_cost/insurance.csv')
columns = data.columns

### First Model without interaction effects modeled

In [39]:
model = ols('charges ~ C(age) + C(sex) + C(bmi) + C(children) + C(smoker) + C(region)', data=data).fit()
stats = sm.stats.anova_lm(model, typ=2)
print(stats['PR(>F)']['C(age)'] < 0.001)
print(stats['PR(>F)']['C(sex)'] < 0.001)
print(stats['PR(>F)']['C(bmi)'] < 0.001)
print(stats['PR(>F)']['C(children)'] < 0.001)
print(stats['PR(>F)']['C(smoker)'] < 0.001)
print(stats['PR(>F)']['C(region)'] < 0.001)

True
False
True
True
True
False


### Second Model without interaction effects modeled

In [53]:
model = ols('charges ~ C(age) + C(sex) + C(bmi) + C(children) + C(smoker) + '\
            'C(sex):C(age) + '\
            'C(sex):C(bmi) + '\
            'C(sex):C(children) + '\
            'C(sex):C(smoker)',data=data).fit()

stats = sm.stats.anova_lm(model, typ=2)
print(stats['PR(>F)']['C(age)'] < 0.001)
print(stats['PR(>F)']['C(sex)'] < 0.001)
print(stats['PR(>F)']['C(bmi)'] < 0.001)
print(stats['PR(>F)']['C(children)'] < 0.001)
print(stats['PR(>F)']['C(smoker)'] < 0.001)

# print(stats['PR(>F)']['C(sex):C(age)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(bmi)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(children)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(smoker)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(region)'] < 0.001)

# print(stats['PR(>F)']['C(sex):C(age)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(bmi)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(children)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(smoker)'] < 0.001)
# print(stats['PR(>F)']['C(sex):C(region)'] < 0.001)
stats



True
False
False
False
True


KeyError: 'C(region)'