## Ch. 3 - Q10

In [1]:
import ISLP
import pandas as pd
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [None]:
# Load in data
Carseats = ISLP.load_data("Carseats")
Carseats.head()

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,9.5,138,73,11,276,120,Bad,42,17,Yes,Yes
1,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
2,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
3,7.4,117,100,4,466,97,Medium,55,14,Yes,Yes
4,4.15,141,64,3,340,128,Bad,38,13,Yes,No


In [None]:
# Assign design matrix and target vector
X = Carseats[['Price', 'Urban', 'US']].copy()
X[['Urban', 'US']] = X[['Urban', 'US']].apply(lambda col: col.map({'Yes': 1, 'No': 0}))
y = Carseats['Sales']

### Part (a)

In [4]:
model = LinearRegression()
model.fit(X, y)

pd.DataFrame({'Variable': ['Intercept'] + list(X.columns), 
              'Coefficient': [model.intercept_] + list(model.coef_)})

Unnamed: 0,Variable,Coefficient
0,Intercept,13.043469
1,Price,-0.054459
2,Urban,-0.021916
3,US,1.200573


### Part (b)

*Keep in mind that the units of `Sales` are in thousands.*

* The coefficient for "Price" means that, on average, increasing the price by $1 decreases sales by 54.46 units, assuming all other factors stay the same.

* The coefficient for "Urban" means that, on average, sales in urban locations are 21.92 units lower than in rural locations, keeping all other factors the same.

* The coefficient for "US" means that, on average, sales in US stores are 1,200.57 units higher than in non-US stores, assuming all other factors remain unchanged.

### Part (c)

In [5]:
equation = f"Sales = {model.intercept_:.2f}"
for coef, col in zip(model.coef_, X.columns):
    if coef >= 0: equation += f" + {coef:.2f} * {col}"
    else: equation += f" - {-coef:.2f} * {col}"
print(equation)

Sales = 13.04 - 0.05 * Price - 0.02 * Urban + 1.20 * US


In [8]:
X_with_intercept = sm.add_constant(X)
model_sm = sm.OLS(y, X_with_intercept).fit()
model_sm.pvalues

const    3.626602e-62
Price    1.609917e-22
Urban    9.357389e-01
US       4.860245e-06
dtype: float64

In [10]:
print(model_sm.summary())

                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.239
Model:                            OLS   Adj. R-squared:                  0.234
Method:                 Least Squares   F-statistic:                     41.52
Date:                Mon, 10 Feb 2025   Prob (F-statistic):           2.39e-23
Time:                        17:37:14   Log-Likelihood:                -927.66
No. Observations:                 400   AIC:                             1863.
Df Residuals:                     396   BIC:                             1879.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         13.0435      0.651     20.036      0.0