In [1]:
import numpy as np
import statsmodels.api as sm
import seaborn as sn
import pandas as pd
from itertools import product

# Question 5

In [2]:
x_normal = np.random.standard_normal(size=100000)
x_exp = np.random.standard_exponential(size=100000) - 1
errors = {'Normal': [], 'Exp': []}
for a, b, c in product([-1, 0, 1], repeat=3):
    for dist, x in [['Normal', x_normal], ['Exp', x_exp]]:
        y = a + b * x + c * x ** 2
        ols = sm.OLS(y, np.c_[np.ones(x.shape[0]), x])
        ols_results = ols.fit()
        params = ols_results.params
        expected = np.array([a + c, b]) if dist == 'Normal' else np.array([a + c, 2 * c + b])
        errors[dist].append(np.square(params - expected))
print('Normal RMSE:', np.sqrt(np.mean(errors['Normal'])))
print('Exp RMSE:', np.sqrt(np.mean(errors['Exp'])))

Normal RMSE: 0.004016624176857915
Exp RMSE: 0.02849942661032183


We can see that the RMSE for both normal and exponential models are close to zero, therefore we conclude that our theoretical calculations are validated using simulation.

# Question 6

In [3]:
tips = pd.get_dummies(sn.load_dataset('tips'))
y = tips[['tip']]
x1 = tips[['total_bill', 'size']]
x2 = x1.assign(interaction=x1['total_bill'] * x1['size'])
for name, x in [['Model 1', x1], ['Model 2', x2]]:
    ols = sm.OLS(y, x.assign(bias=1))
    results = ols.fit()
    params = results.params
    print(f'Parameters for {name}:')
    print(params)

Parameters for Model 1:
total_bill    0.092713
size          0.192598
bias          0.668945
dtype: float64
Parameters for Model 2:
total_bill     0.081881
size           0.093269
interaction    0.003941
bias           0.918271
dtype: float64


Including the interaction term significantly decreased the effect of the size parameter on the tip while only decreasing the effect of the total_bill parameter mildly. We suspect that some of the effect of the size parameter is now accounted for in the interaction term. The values of total_bill are on a higher scale than the values of size, causing the paramater to have lower weight. Naturally, the bias term changed slightly.