<a href="https://colab.research.google.com/github/francji1/01RAD/blob/main/code/01RAD_Ex07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

from scipy import stats

from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures


## Stepwise regression completion from the last exercise

In [None]:
# Load and clean the dataset
data = sns.load_dataset('mpg').dropna()

# Drop the redundant "name" column
data = data.drop(columns=['name'])

# Check the structure of data
data.head()

Take model with all features and second order interactions

In [None]:
# Define interaction terms in the formula
formula = 'mpg ~ (cylinders + displacement + horsepower + weight + acceleration + model_year)**2'

# Fit an OLS model with interactions
model = smf.ols(formula, data=data).fit()

# Display model summary
print(model.summary())

In [None]:
# Extract t-scores and p-values for individual variables
t_scores = model.tvalues
p_values = model.pvalues

# Combine into a DataFrame for clarity
significant_features = pd.DataFrame({
    'Feature': t_scores.index,
    't-Score': t_scores.values,
    'p-Value': p_values.values
}).sort_values(by='p-Value', ascending=True)

# Display significant features with their t-scores and p-values
from IPython.display import display
display(significant_features)

### Discussion of usefulness or not usefulness **SelectKBest**

In [None]:
# Stepwise selection with SelectKBest
# Prepare data for scikit-learn
# Extract predictors and response
X = data[['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year']]
y = data['mpg']

# Generate polynomial and interaction terms
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)

# Get feature names for interaction terms
feature_names = poly.get_feature_names_out(input_features=X.columns)

# Use SelectKBest with F-regression
selector = SelectKBest(score_func=f_regression, k=8)  # Use 'k' to limit the number of features if needed
X_selected = selector.fit_transform(X_poly, y)

# Get selected features and their scores
selected_features = feature_names[selector.get_support()]
scores = selector.scores_[selector.get_support()]

# Print significant features
significant_features = pd.DataFrame({
    'Feature': selected_features,
    'F-Score': scores
}).sort_values(by='F-Score', ascending=False)

# Display significant features with their t-scores and p-values
from IPython.display import display
display(significant_features)


In [None]:
# Fit an OLS model with interactions
model_weight = smf.ols('mpg ~ (weight)', data=data).fit()

# Display model summary
print(model_weight.summary())

# Information Criteria: AIC and BIC


## AIC: Akaike Information Criterion

The **AIC** is defined as:

$$
\text{AIC} = -2 \ln(\hat{L}) + 2k
$$

- $ \ln(\hat{L}) $: The log-likelihood of the model.
- $ k $: The number of estimated parameters in the model (including the intercept).

The AIC penalizes model complexity by adding $ 2k $. Lower AIC values indicate a better trade-off between model fit and complexity.

---

## BIC: Bayesian Information Criterion

The **BIC** is defined as:

$$
\text{BIC} = -2 \ln(\hat{L}) + k \ln(n)
$$

- $ \ln(\hat{L}) $: The log-likelihood of the model.
- $ k $: The number of estimated parameters.
- $ n $: The number of observations in the dataset.

The BIC penalizes complexity more heavily than the AIC, as the penalty term $ k \ln(n) $ grows with the size of the dataset.

---

## Key Differences Between AIC and BIC

| Criterion | Penalizes Complexity | Suitable When |
|-----------|-----------------------|---------------|
| **AIC**   | $ 2k $             | predictive models |
| **BIC**   | $ k \ln(n) $       | simpler, interpretable models with fewer predictors |

---



In [None]:
# Function to generate interaction and polynomial terms
def generate_interaction_terms(predictors):
    interaction_terms = [f"{a}:{b}" for a, b in itertools.combinations(predictors, 2)]
    quadratic_terms = [f"I({var}**2)" for var in predictors]
    return predictors + interaction_terms + quadratic_terms

# Modified stepwise selection function
def stepwise_selection(data, response, predictors):
    """
    Perform stepwise regression using AIC and BIC.

    Args:
    - data: DataFrame containing the dataset.
    - response: The dependent variable.
    - predictors: List of predictors (independent variables).

    Returns:
    - aic_values: List of minimum AIC values at each step.
    - bic_values: List of minimum BIC values at each step.
    - best_aic_model: Final model selected using AIC.
    - best_bic_model: Final model selected using BIC.
    """
    selected_predictors = []  # Start with no predictors
    remaining_predictors = set(predictors)
    current_aic = float('inf')
    current_bic = float('inf')

    aic_values = []
    bic_values = []
    best_aic_model = None
    best_bic_model = None

    while remaining_predictors:
        scores_with_candidates = []
        for candidate in remaining_predictors:
            formula = f"{response} ~ {' + '.join(selected_predictors + [candidate])}"
            model = smf.ols(formula, data=data).fit()
            scores_with_candidates.append((model.aic, model.bic, candidate, model))

        # Sort candidates by AIC
        scores_with_candidates.sort(key=lambda x: x[0])
        best_aic, best_bic, best_candidate, best_model = scores_with_candidates[0]

        # Update AIC and BIC
        if best_aic < current_aic:
            current_aic = best_aic
            best_aic_model = best_model
        if best_bic < current_bic:
            current_bic = best_bic
            best_bic_model = best_model

        # Record current minimum AIC/BIC
        aic_values.append(current_aic)
        bic_values.append(current_bic)

        # Add the best candidate to selected predictors and remove from remaining
        selected_predictors.append(best_candidate)
        remaining_predictors.remove(best_candidate)

    return aic_values, bic_values, best_aic_model, best_bic_model


In [None]:
# Prepare dataset
data = sns.load_dataset('mpg').dropna()
data = data.drop(columns=['name'])  # Drop redundant "name" column

# Define response and predictors
response = 'mpg'
predictors = ['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year']

# Generate interaction and polynomial terms
expanded_predictors = generate_interaction_terms(predictors)

# Fit an OLS large model with all interactions and quadratic terms
large_formula = f"{response} ~ {' + '.join(expanded_predictors)}"
large_model = smf.ols(large_formula, data=data).fit()
print(large_model.summary())


In [None]:
# Run stepwise selection with expanded predictors
aic_values, bic_values, best_aic_model, best_bic_model = stepwise_selection(data, response, expanded_predictors)

# Print the minimum AIC and BIC and their corresponding models
print("Minimum AIC:", min(aic_values))
print("Selected Predictors for AIC Model:")
print(best_aic_model.summary())

print("\nMinimum BIC:", min(bic_values))
print("Selected Predictors for BIC Model:")
print(best_bic_model.summary())


In [None]:
# Plot AIC and BIC values
plt.figure(figsize=(10, 6))
x = np.arange(1, len(aic_values) + 1)
plt.plot(x, aic_values, marker='o', label='AIC')
plt.plot(x, bic_values, marker='s', label='BIC')
plt.axvline(np.argmin(aic_values) + 1, color='blue', linestyle='--', label='Min AIC')
plt.axvline(np.argmin(bic_values) + 1, color='orange', linestyle='--', label='Min BIC')
plt.xlabel('Number of Features')
plt.ylabel('Criterion Value')
plt.title('Stepwise Selection: AIC and BIC over Number of Features (Including Interactions)')
plt.legend()
plt.show()

## Transformations in Linear regression

In [None]:
# Fit the initial model
model = smf.ols('mpg ~ weight + horsepower + displacement', data=data).fit()

# Plot residuals vs. fitted values to check for heteroscedasticity
residuals = model.resid
fitted = model.fittedvalues

plt.scatter(fitted, residuals)
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.title("Residuals vs. Fitted Values (Checking for Heteroscedasticity)")
plt.show()


# Logarithmic Transformation of the Response



When the response variable $Y$ is transformed using the natural logarithm, the regression model changes from:

$$
Y = \beta_0 + \beta_1 X_1 + \beta_2 X_2 + \ldots + \epsilon
$$

to:

$$
\ln(Y) = \beta_0 + \beta_1 X_1 + \beta_2 X_2 + \ldots + \epsilon
$$


The Logarithmic transformation is particularly useful when:
- Non-linearity between the predictors and response.
- Heteroscedasticity (non-constant variance of residuals).
- Skewness in the response distribution.
- The relationship between predictors and the response is **multiplicative** rather than **additive**.
---



## Example

Suppose you're modeling the relationship between `mpg` ($Y$) and `weight` of the car ($X$):

1. **Additive Error**: `mpg` increases by a fixed amount of `weight`.
   $$
   \text{mpg} = \beta_0 + \beta_1 \cdot \text{weight} + \epsilon
   $$

2. **Multiplicative Error**: `mpg` increases by a fixed percentage for each additional unit of `weight`.
   $$
   \ln(\text{mpg}) = \beta_0 + \beta_1 \cdot \text{weight} + \epsilon
   $$

In the second model, $\beta_1$ represents the **percentage increase in income** per year of education.




In [None]:
# Add log-transformed mpg to the dataset
data['log_mpg'] = np.log(data['mpg'])

# Fit the model with log-transformed mpg
model_log = smf.ols('log_mpg ~ weight + horsepower + displacement', data=data).fit()

# Extract residuals and fitted values
residuals_log = model_log.resid
fitted_log = model_log.fittedvalues

# Plot residuals vs. fitted values for log-transformed response
plt.figure(figsize=(8, 6))
plt.scatter(fitted_log, residuals_log, alpha=0.7)
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Fitted Values (Log MPG)")
plt.ylabel("Residuals")
plt.title("Residuals vs. Fitted Values (Log Transformation)")
plt.show()


# Box-Cox Transformation

The **Box-Cox transformation** is a statistical technique used to stabilize variance and make data more normally distributed. It is particularly useful when the response variable exhibits non-constant variance (heteroscedasticity) or skewness.

---

## Definition of the Box-Cox Transformation

The Box-Cox transformation is defined as:

$$
Y(\lambda) =
\begin{cases}
\frac{Y^\lambda - 1}{\lambda}, & \text{if } \lambda \neq 0 \\
\ln(Y), & \text{if } \lambda = 0
\end{cases}
$$

- $Y$: The response variable (must be positive).
- $\lambda$: The transformation parameter.

---

The  Box-Cox transformation is particularly useful to:
-  **Stabilize Variance**: The transformation reduces heteroscedasticity, ensuring that residual variance remains constant across the range of predicted values.
- **Normalize the Data**: By finding the optimal $\lambda$, the transformation makes the distribution of the response variable closer to normal, which is often an assumption of regression models.

---

## Selecting the Optimal Lambda

The optimal value of $\lambda$ is typically chosen to maximize the log-likelihood function. For each candidate $\lambda$, the likelihood is computed, and the value that maximizes the likelihood is selected as the optimal $\lambda$.

---



## Log-Likelihood Function for Box-Cox Transformation

In the context of the Box-Cox transformation, the likelihood function assumes that the transformed response variable $Y(\lambda)$ follows a normal distribution:

$$
L(\lambda) = \prod_{i=1}^n \frac{1}{\sqrt{2\pi \sigma^2}} \exp\left(-\frac{(Y_i(\lambda) - \hat{Y}_i(\lambda))^2}{2\sigma^2}\right),
$$

where:
- $Y_i(\lambda) $ is the transformed response for observation \( i \),
- $ \hat{Y}_i(\lambda) = X_i \hat{\beta} $ is the predicted value based on the transformed response,
- $ \sigma^2 $ is the variance of the residuals.

---

### Taking the Natural Logarithm of the Likelihood Function

The natural logarithm of the likelihood function becomes:

$$
\ln L(\lambda) = \sum_{i=1}^n \ln \left( \frac{1}{\sqrt{2\pi \sigma^2}} \exp\left(-\frac{(Y_i(\lambda) - \hat{Y}_i(\lambda))^2}{2\sigma^2}\right) \right).
$$

Expanding the logarithm and simplifying:


$$
\ln L(\lambda) = -\frac{n}{2} \ln(2\pi) - \frac{n}{2} \ln(\sigma^2) - \frac{1}{2\sigma^2} \sum_{i=1}^n (Y_i(\lambda) - \hat{Y}_i(\lambda))^2.
$$


Define the sum of squared residuals in the transformed space as

$$
SSE(\lambda) = \sum_{i=1}^n (Y_i(\lambda) - \hat{Y}_i(\lambda))^2.
$$

and

$$
\sigma^2 = \frac{SSE(\lambda)}{n},
$$

the log-likelihood simplifies to:

$$
\ln L(\lambda) = -\frac{n}{2} \ln\left(\frac{1}{n} SSE(\lambda)\right) + (\lambda - 1) \sum_{i=1}^n \ln(Y_i).
$$

---

To find the optimal $ \lambda $, maximize $\ln L(\lambda) $ over a range of candidate $ \lambda $ values.


In [None]:
# Ensure mpg has no zero or negative values before Box-Cox transformation
# data['mpg'] = data['mpg'] + 0.01  # Adding 0.01 to ensure positivity
data['mpg_bc'], fitted_lambda = stats.boxcox(data['mpg'])
print(f"Optimal Lambda for Box-Cox: {fitted_lambda}")

# Fit the model with Box-Cox transformed mpg
model_boxcox = smf.ols('mpg_bc ~ weight + horsepower + displacement', data=data).fit()

# Extract residuals and fitted values
residuals_boxcox = model_boxcox.resid
fitted_boxcox = model_boxcox.fittedvalues

# Plot residuals vs. fitted values
plt.figure(figsize=(8, 6))
plt.scatter(fitted_boxcox, residuals_boxcox, alpha=0.7)
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Fitted Values (Box-Cox Transformed MPG)")
plt.ylabel("Residuals")
plt.title("Residuals vs. Fitted Values (Box-Cox Transformation)")
plt.show()
# Check if the Box-Cox lambda is close to 0, which would imply a log transformation


In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

# Recalculating the confidence interval for the optimal lambda
_, lmbda_optimal, (lmbda_ci_lower, lmbda_ci_upper) = stats.boxcox(data['mpg'], alpha=0.05)

# Generate log-likelihood values for a range of lambda
lmbdas = np.linspace(-2, 2, 100)
llf = [stats.boxcox_llf(lmbda, data['mpg']) for lmbda in lmbdas]

# Replotting the log-likelihood as a function of lambda with the correct confidence interval
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(lmbdas, llf, 'b.-', label='Log-Likelihood')
ax.axhline(stats.boxcox_llf(lmbda_optimal, data['mpg']), color='r', linestyle='-', label=f'Optimal LL: {lmbda_optimal:.2f}')
ax.axvline(lmbda_optimal, color='r', linestyle='--', label=f'Optimal Lambda: {lmbda_optimal:.2f}')
ax.axvline(lmbda_ci_lower, color='g', linestyle='--', label=f'CI Lower: {lmbda_ci_lower:.2f}')
ax.axvline(lmbda_ci_upper, color='g', linestyle='--', label=f'CI Upper: {lmbda_ci_upper:.2f}')
ax.set_xlabel('Lambda parameter')
ax.set_ylabel('Box-Cox log-likelihood')
ax.legend()

# Insert QQ-plots for selected lambda values
selected_lambdas = [-1, lmbda_optimal, 1]  # Include the optimal lambda and other extremes
locations = ['lower left', 'center', 'lower right']  # Locations for QQ-plots

for lmbda, loc in zip(selected_lambdas, locations):
    transformed_data = stats.boxcox(data['mpg'], lmbda=lmbda)
    (osm, osr), (slope, intercept, r_sq) = stats.probplot(transformed_data, dist="norm")
    ax_inset = inset_axes(ax, width="20%", height="20%", loc=loc)
    ax_inset.plot(osm, osr, 'c.', label='Data')
    ax_inset.plot(osm, slope * osm + intercept, 'k-', label='Fit')
    ax_inset.set_title(r'$\lambda=%1.2f$' % lmbda)
    ax_inset.set_xticks([])
    ax_inset.set_yticks([])

plt.tight_layout()
plt.show()

# Fit the model with Box-Cox transformed mpg
model_boxcox = smf.ols('mpg_bc ~ weight + horsepower + displacement', data=data).fit()

# Extract residuals and fitted values
residuals_boxcox = model_boxcox.resid
fitted_boxcox = model_boxcox.fittedvalues

# Print the optimal lambda and its confidence interval
print(f"Optimal Lambda: {lmbda_optimal}")
print(f"95% Confidence Interval for Lambda: ({lmbda_ci_lower}, {lmbda_ci_upper})")


In [None]:
# Square Root Transformation
data['sqrt_mpg'] = np.sqrt(data['mpg'])
model_sqrt = smf.ols('sqrt_mpg ~ weight + horsepower + displacement', data=data).fit()

# Reciprocal Transformation
data['reciprocal_mpg'] = 1 / data['mpg']
model_reciprocal = smf.ols('reciprocal_mpg ~ weight + horsepower + displacement', data=data).fit()


In [None]:
data

In [None]:
# Re-Create transformed variables
data['log_mpg'] = np.log(data['mpg'])
data['sqrt_mpg'] = np.sqrt(data['mpg'])
data['mpg_bc'], fitted_lambda = stats.boxcox(data['mpg'])

# Fit models with different transformations
model_original = smf.ols('mpg ~ weight + horsepower + displacement', data=data).fit()
model_log = smf.ols('log_mpg ~ weight + horsepower + displacement', data=data).fit()
model_sqrt = smf.ols('sqrt_mpg ~ weight + horsepower + displacement', data=data).fit()
model_boxcox = smf.ols('mpg_bc ~ weight + horsepower + displacement', data=data).fit()

# Calculate studentized residuals for each model
residuals_original = model_original.get_influence().resid_studentized_internal
residuals_log = model_log.get_influence().resid_studentized_internal
residuals_sqrt = model_sqrt.get_influence().resid_studentized_internal
residuals_boxcox = model_boxcox.get_influence().resid_studentized_internal

# Combine residuals in a DataFrame for easy plotting
residuals_df = pd.DataFrame({
    'Original': residuals_original,
    'Log': residuals_log,
    'Square Root': residuals_sqrt,
    'Box-Cox': residuals_boxcox
})


In [None]:
# Plot studentized residuals for all transformations in separate subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, col in enumerate(residuals_df.columns):
    axes[i].plot(residuals_df.index, residuals_df[col], marker='o', linestyle='', alpha=0.5)
    axes[i].axhline(0, color='gray', linestyle='--')
    axes[i].set_title(f"Studentized Residuals: {col} Transformation")
    axes[i].set_xlabel("Index")
    axes[i].set_ylabel("Studentized Residuals")

plt.tight_layout()
plt.show()


In [None]:

# Plot studentized residuals for all transformations
plt.figure(figsize=(12, 6))
for col in residuals_df.columns:
    plt.plot(residuals_df.index, residuals_df[col], marker='o', linestyle='', alpha=0.5, label=col)
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Index")
plt.ylabel("Studentized Residuals")
plt.title("Studentized Residuals for Different Transformations")
plt.legend()
plt.show()


In [None]:

# QQ-plots for studentized residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
for i, col in enumerate(residuals_df.columns):
    sm.qqplot(residuals_df[col], line='45', ax=axes[i // 2, i % 2])
    axes[i // 2, i % 2].set_title(f"QQ-plot: {col} Transformation")

plt.tight_layout()
plt.show()


In [None]:
# Histograms of studentized residuals
residuals_df.plot(kind='hist', bins=20, alpha=0.5, subplots=True, layout=(2, 2), figsize=(12, 10), legend=False)
plt.suptitle("Histograms of Studentized Residuals for Different Transformations")
plt.show()


## Artificially generated dataset

### Simple linear model

In [None]:
# Set seed for reproducibility
np.random.seed(42)


In [None]:
# --- Case 1: Y depends on X^2 ---
# Generate data
X = np.linspace(0, 10, 100)
Y1 = X**2 + np.random.normal(0, 5, size=100)
data1 = pd.DataFrame({'X': X, 'Y': Y1})

# Model Y ~ X (incorrect)
model1_incorrect = smf.ols('Y ~ X', data=data1).fit()

# Model Y ~ X^2 (correct)
data1['X2'] = X**2
model1_correct = smf.ols('Y ~ X2', data=data1).fit()


In [None]:
# --- Case 2: Y depends on sqrt(X) ---
# Generate data
Y2 = np.sqrt(X) + np.random.normal(0, 0.1, size=100)
data2 = pd.DataFrame({'X': X, 'Y': Y2})

# Model Y ~ X (incorrect)
model2_incorrect = smf.ols('Y ~ X', data=data2).fit()

# Model Y ~ sqrt(X) (correct)
data2['sqrt_X'] = np.sqrt(X)
model2_correct = smf.ols('Y ~ sqrt_X', data=data2).fit()


In [None]:
# Case 1: Y ~ X^2 vs Y ~ X
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(data1['X'], data1['Y'], label='True Data', alpha=0.6)
plt.plot(data1['X'], model1_incorrect.fittedvalues, color='red', label='Y ~ X (Incorrect)')
plt.plot(data1['X'], model1_correct.fittedvalues, color='green', label='Y ~ X^2 (Correct)')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Case 1: True Relationship Y ~ X^2')
plt.legend()


In [None]:
# Case 2: Y ~ sqrt(X) vs Y ~ X
plt.subplot(1, 2, 2)
plt.scatter(data2['X'], data2['Y'], label='True Data', alpha=0.6)
plt.plot(data2['X'], model2_incorrect.fittedvalues, color='red', label='Y ~ X (Incorrect)')
plt.plot(data2['X'], model2_correct.fittedvalues, color='green', label='Y ~ sqrt(X) (Correct)')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Case 2: True Relationship Y ~ sqrt(X)')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# --- Summaries ---
# Print model summaries for Case 1
print("Case 1: Y ~ X^2 vs Y ~ X")
print("Incorrect Model (Y ~ X):")
print(model1_incorrect.summary())
print("\nCorrect Model (Y ~ X^2):")
print(model1_correct.summary())

# Print model summaries for Case 2
print("\nCase 2: Y ~ sqrt(X) vs Y ~ X")
print("Incorrect Model (Y ~ X):")
print(model2_incorrect.summary())
print("\nCorrect Model (Y ~ sqrt(X)):")
print(model2_correct.summary())


In [None]:
import statsmodels.graphics.api as smg

# Function to plot partial regression and partial residual plots using smf models
def plot_partial_plots_smf(model, data, predictor, case_title):
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Partial regression plot
    smg.plot_ccpr(model, predictor, ax=axes[0])
    axes[0].set_title(f"Partial Regression Plot ({case_title})")

    # Partial residual plot
    smg.plot_partregress('Y', predictor, ['X'], data=data, ax=axes[1])
    axes[1].set_title(f"Partial Residual Plot ({case_title})")

    plt.tight_layout()
    plt.show()

# --- Case 1: Y depends on X^2 but modeled as Y ~ X ---
print("Case 1: Y depends on X^2 but modeled as Y ~ X")
plot_partial_plots_smf(model1_incorrect, data1, 'X', "Case 1: Y ~ X")

# --- Case 2: Y depends on sqrt(X) but modeled as Y ~ X ---
print("Case 2: Y depends on sqrt(X) but modeled as Y ~ X")
plot_partial_plots_smf(model2_incorrect, data2, 'X', "Case 2: Y ~ X")


Add more variables

In [None]:
import statsmodels.graphics.api as smg

# --- Case 1: Y depends on X1^2 ---
# Generate data
X1 = np.linspace(0, 10, 100)  # Main variable of interest
X2 = np.random.uniform(0, 10, 100)  # Additional variable
X3 = np.random.normal(5, 2, 100)  # Another additional variable
Y1 = X1**2 + 2*X2 + 3*X3 + np.random.normal(0, 5, size=100)  # True relationship

# Create a DataFrame
data1 = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'Y': Y1})

# Fit incorrect and correct models
model1_incorrect = smf.ols('Y ~ X1 + X2 + X3', data=data1).fit()
data1['X1_squared'] = X1**2
model1_correct = smf.ols('Y ~ X1_squared + X2 + X3', data=data1).fit()

# --- Case 2: Y depends on sqrt(X1) ---
# Generate data
Y2 = np.sqrt(X1) + 2*X2 + 3*X3 + np.random.normal(0, 0.1, size=100)  # True relationship

# Create a DataFrame
data2 = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'Y': Y2})

# Fit incorrect and correct models
model2_incorrect = smf.ols('Y ~ X1 + X2 + X3', data=data2).fit()
data2['sqrt_X1'] = np.sqrt(X1)
model2_correct = smf.ols('Y ~ sqrt_X1 + X2 + X3', data=data2).fit()

# --- Function to plot partial regression and partial residual plots ---
def plot_partial_plots_smf(model, data, predictor, case_title):
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Partial regression plot
    smg.plot_ccpr(model, predictor, ax=axes[0])
    axes[0].set_title(f"Partial Regression Plot ({case_title})")

    # Partial residual plot
    smg.plot_partregress('Y', predictor, ['X1', 'X2', 'X3'], data=data, ax=axes[1])
    axes[1].set_title(f"Partial Residual Plot ({case_title})")

    plt.tight_layout()
    plt.show()

# --- Case 1: Y depends on X1^2 but modeled as Y ~ X1 ---
print("Case 1: Y depends on X1^2 but modeled as Y ~ X1")
plot_partial_plots_smf(model1_incorrect, data1, 'X1', "Case 1: Y ~ X1")
print("Correct model: Partial plots not needed as transformation is applied.")

# --- Case 2: Y depends on sqrt(X1) but modeled as Y ~ X1 ---
print("Case 2: Y depends on sqrt(X1) but modeled as Y ~ X1")
plot_partial_plots_smf(model2_incorrect, data2, 'X1', "Case 2: Y ~ X1")
print("Correct model: Partial plots not needed as transformation is applied.")
