In [3]:
import numpy as np

def simulate(A=1, B=1, C=10, D=1000):
    """
    Simulate data according to the given model:
    W ~ N(0,1)
    X = W + N(0,B)
    Y = A*X - W + N(0,C)
    """
    W = np.random.normal(0, 1, D)
    X = W + np.random.normal(0, B, D)
    Y = A*X - W + np.random.normal(0, C, D)
    return Y, X, W

def manual_regression_with_W(Y, X, W):
    """
    Manually calculate regression Y ~ X + W and return the t-statistic for X
    """
    # Create design matrix with intercept, X, and W
    n = len(Y)
    X_matrix = np.column_stack([np.ones(n), X, W])
    
    # Calculate coefficients using normal equation: β = (X'X)^(-1)X'Y
    XTX = X_matrix.T @ X_matrix
    XTY = X_matrix.T @ Y
    coefficients = np.linalg.solve(XTX, XTY)
    
    # Calculate residuals
    Y_pred = X_matrix @ coefficients
    residuals = Y - Y_pred
    
    # Calculate residual sum of squares
    RSS = np.sum(residuals**2)
    
    # Degrees of freedom
    df = n - X_matrix.shape[1]  # n - number of parameters
    
    # Mean squared error
    MSE = RSS / df
    
    # Variance-covariance matrix of coefficients
    var_covar_matrix = MSE * np.linalg.inv(XTX)
    
    # Standard error of X coefficient (index 1)
    se_X = np.sqrt(var_covar_matrix[1, 1])
    
    # t-statistic for X coefficient
    t_stat = coefficients[1] / se_X
    
    return t_stat, coefficients, se_X

def calculate_power(A=1, B=1, C=10, D=1000, n_simulations=1000):
    """
    Calculate the probability of detecting a nonzero effect of X on Y
    (t-value > 1.96 in absolute value) through Monte Carlo simulation
    """
    significant_count = 0
    
    for i in range(n_simulations):
        # Generate data
        Y, X, W = simulate(A, B, C, D)
        
        # Run regression
        t_stat, _, _ = manual_regression_with_W(Y, X, W)
        
        # Check if significant (|t| > 1.96)
        if abs(t_stat) > 1.96:
            significant_count += 1
    
    power = significant_count / n_simulations
    return power

# Set random seed for reproducibility
np.random.seed(42)

# Define parameters
A, B, C, D = 1, 1, 10, 1000

# Calculate power for the given parameters
power = calculate_power(A=A, B=B, C=C, D=D, n_simulations=1000)
print(f"Probability of detecting nonzero effect: {power:.1%}")

# Let's also run a single simulation to understand the model better
Y, X, W = simulate(A=A, B=B, C=C, D=D)
t_stat, coefficients, se_X = manual_regression_with_W(Y, X, W)

print(f"\nSample regression results:")
print(f"Intercept coefficient: {coefficients[0]:.3f}")
print(f"X coefficient: {coefficients[1]:.3f}")
print(f"W coefficient: {coefficients[2]:.3f}")
print(f"Standard error of X: {se_X:.3f}")
print(f"t-statistic for X: {t_stat:.3f}")
print(f"Is significant (|t| > 1.96): {abs(t_stat) > 1.96}")

# Data characteristics
print(f"\nData characteristics:")
print(f"Sample size (D): {len(Y)}")
print(f"True effect of X (A): {A}")
print(f"Noise in X (B): {B}")
print(f"Noise in Y (C): {C}")
print(f"Correlation between X and W: {np.corrcoef(X, W)[0,1]:.3f}")
print(f"Standard deviation of X: {np.std(X):.3f}")
print(f"Standard deviation of Y: {np.std(Y):.3f}")

# Run multiple trials to get a more stable estimate
print(f"\nRunning multiple power calculations:")
powers = []
for trial in range(10):
    np.random.seed(42 + trial)
    power_trial = calculate_power(A=A, B=B, C=C, D=D, n_simulations=500)
    powers.append(power_trial)
    print(f"Trial {trial+1}: {power_trial:.1%}")

mean_power = np.mean(powers)
std_power = np.std(powers)
print(f"\nMean power across trials: {mean_power:.1%}")
print(f"Standard deviation: {std_power:.3f}")

# Compare with options
print(f"\nClosest option:")
options = [0.98, 0.88, 0.83, 0.93]
option_labels = ['A (98%)', 'B (88%)', 'C (83%)', 'D (93%)']
differences = [abs(mean_power - opt) for opt in options]
closest_idx = np.argmin(differences)
print(f"Option {option_labels[closest_idx]} - difference: {differences[closest_idx]:.3f}")

# Let's also do a quick theoretical check
print(f"\nTheoretical considerations:")
print(f"- Large sample size (n=1000) increases power")
print(f"- True effect size A=1 is moderate")
print(f"- High noise in Y (C=10) decreases power")
print(f"- Including confounder W is essential for unbiased estimation")

Probability of detecting nonzero effect: 86.6%

Sample regression results:
Intercept coefficient: 0.068
X coefficient: 1.137
W coefficient: -1.833
Standard error of X: 0.308
t-statistic for X: 3.692
Is significant (|t| > 1.96): True

Data characteristics:
Sample size (D): 1000
True effect of X (A): 1
Noise in X (B): 1
Noise in Y (C): 10
Correlation between X and W: 0.712
Standard deviation of X: 1.457
Standard deviation of Y: 10.040

Running multiple power calculations:
Trial 1: 87.4%
Trial 2: 91.8%
Trial 3: 88.2%
Trial 4: 88.0%
Trial 5: 88.4%
Trial 6: 86.4%
Trial 7: 88.2%
Trial 8: 88.8%
Trial 9: 90.0%
Trial 10: 90.6%

Mean power across trials: 88.8%
Standard deviation: 0.015

Closest option:
Option B (88%) - difference: 0.008

Theoretical considerations:
- Large sample size (n=1000) increases power
- True effect size A=1 is moderate
- High noise in Y (C=10) decreases power
- Including confounder W is essential for unbiased estimation


In [4]:
import numpy as np

def simulate(A=1, B=1, C=10, D=1000):
    """
    Simulate data according to the given model:
    W ~ N(0,1)
    X = W + N(0,B)
    Y = A*X - W + N(0,C)
    """
    W = np.random.normal(0, 1, D)
    X = W + np.random.normal(0, B, D)
    Y = A*X - W + np.random.normal(0, C, D)
    return Y, X, W

def manual_regression_with_W(Y, X, W):
    """
    Manually calculate regression Y ~ X + W and return the t-statistic for X
    """
    # Create design matrix with intercept, X, and W
    n = len(Y)
    X_matrix = np.column_stack([np.ones(n), X, W])
    
    # Calculate coefficients using normal equation: β = (X'X)^(-1)X'Y
    XTX = X_matrix.T @ X_matrix
    XTY = X_matrix.T @ Y
    coefficients = np.linalg.solve(XTX, XTY)
    
    # Calculate residuals
    Y_pred = X_matrix @ coefficients
    residuals = Y - Y_pred
    
    # Calculate residual sum of squares
    RSS = np.sum(residuals**2)
    
    # Degrees of freedom
    df = n - X_matrix.shape[1]  # n - number of parameters
    
    # Mean squared error
    MSE = RSS / df
    
    # Variance-covariance matrix of coefficients
    var_covar_matrix = MSE * np.linalg.inv(XTX)
    
    # Standard error of X coefficient (index 1)
    se_X = np.sqrt(var_covar_matrix[1, 1])
    
    # t-statistic for X coefficient
    t_stat = coefficients[1] / se_X
    
    return t_stat, coefficients, se_X

def calculate_power(A=1, B=1, C=10, D=1000, n_simulations=1000):
    """
    Calculate the probability of detecting a nonzero effect of X on Y
    (t-value > 1.96 in absolute value) through Monte Carlo simulation
    """
    significant_count = 0
    
    for i in range(n_simulations):
        # Generate data
        Y, X, W = simulate(A, B, C, D)
        
        # Run regression
        t_stat, _, _ = manual_regression_with_W(Y, X, W)
        
        # Check if significant (|t| > 1.96)
        if abs(t_stat) > 1.96:
            significant_count += 1
    
    power = significant_count / n_simulations
    return power

def calculate_skewness(A=1, B=1, C=10, D=1000, n_simulations=5000):
    """
    Calculate the skewness of the X coefficient estimates
    """
    from scipy.stats import skew
    
    coefficients = []
    
    for i in range(n_simulations):
        # Generate data
        Y, X, W = simulate(A, B, C, D)
        
        # Run regression
        t_stat, coef, _ = manual_regression_with_W(Y, X, W)
        
        # Store the X coefficient (index 1)
        coefficients.append(coef[1])
    
    coefficients = np.array(coefficients)
    
    # Calculate skewness
    skewness = skew(coefficients)
    
    return skewness, coefficients

# Set random seed for reproducibility
np.random.seed(42)

# Define parameters
A, B, C, D = 1, 1, 10, 1000

# Calculate power for the given parameters
power = calculate_power(A=A, B=B, C=C, D=D, n_simulations=1000)
print(f"Probability of detecting nonzero effect: {power:.1%}")

# Let's also run a single simulation to understand the model better
Y, X, W = simulate(A=A, B=B, C=C, D=D)
t_stat, coefficients, se_X = manual_regression_with_W(Y, X, W)

print(f"\nSample regression results:")
print(f"Intercept coefficient: {coefficients[0]:.3f}")
print(f"X coefficient: {coefficients[1]:.3f}")
print(f"W coefficient: {coefficients[2]:.3f}")
print(f"Standard error of X: {se_X:.3f}")
print(f"t-statistic for X: {t_stat:.3f}")
print(f"Is significant (|t| > 1.96): {abs(t_stat) > 1.96}")

# Data characteristics
print(f"\nData characteristics:")
print(f"Sample size (D): {len(Y)}")
print(f"True effect of X (A): {A}")
print(f"Noise in X (B): {B}")
print(f"Noise in Y (C): {C}")
print(f"Correlation between X and W: {np.corrcoef(X, W)[0,1]:.3f}")
print(f"Standard deviation of X: {np.std(X):.3f}")
print(f"Standard deviation of Y: {np.std(Y):.3f}")

# Run multiple trials to get a more stable estimate
print(f"\nRunning multiple power calculations:")
powers = []
for trial in range(10):
    np.random.seed(42 + trial)
    power_trial = calculate_power(A=A, B=B, C=C, D=D, n_simulations=500)
    powers.append(power_trial)
    print(f"Trial {trial+1}: {power_trial:.1%}")

mean_power = np.mean(powers)
std_power = np.std(powers)
print(f"\nMean power across trials: {mean_power:.1%}")
print(f"Standard deviation: {std_power:.3f}")

# Compare with options
print(f"\nClosest option:")
options = [0.98, 0.88, 0.83, 0.93]
option_labels = ['A (98%)', 'B (88%)', 'C (83%)', 'D (93%)']
differences = [abs(mean_power - opt) for opt in options]
closest_idx = np.argmin(differences)
print(f"Option {option_labels[closest_idx]} - difference: {differences[closest_idx]:.3f}")

# Calculate skewness of X coefficient estimates
print(f"\n" + "="*60)
print("QUESTION 2: SKEWNESS OF X COEFFICIENT ESTIMATES")
print("="*60)

np.random.seed(42)
skewness_value, coefficients = calculate_skewness(A=A, B=B, C=C, D=D, n_simulations=5000)

print(f"Skewness of X coefficient estimates: {skewness_value:.3f}")
print(f"Mean of X coefficients: {np.mean(coefficients):.3f}")
print(f"Standard deviation of X coefficients: {np.std(coefficients):.3f}")
print(f"Min coefficient: {np.min(coefficients):.3f}")
print(f"Max coefficient: {np.max(coefficients):.3f}")

# Compare with options
print(f"\nClosest option for skewness:")
skew_options = [-0.15, 0, 0.34, 0.15]
skew_option_labels = ['A (-0.15)', 'B (0)', 'C (0.34)', 'D (0.15)']
skew_differences = [abs(skewness_value - opt) for opt in skew_options]
closest_skew_idx = np.argmin(skew_differences)
print(f"Option {skew_option_labels[closest_skew_idx]} - difference: {skew_differences[closest_skew_idx]:.3f}")

# Let's also check with multiple trials for stability
print(f"\nMultiple skewness calculations:")
skewness_values = []
for trial in range(5):
    np.random.seed(42 + trial)
    skew_trial, _ = calculate_skewness(A=A, B=B, C=C, D=D, n_simulations=3000)
    skewness_values.append(skew_trial)
    print(f"Trial {trial+1}: {skew_trial:.3f}")

mean_skewness = np.mean(skewness_values)
print(f"\nMean skewness across trials: {mean_skewness:.3f}")

# Final comparison
skew_differences_mean = [abs(mean_skewness - opt) for opt in skew_options]
closest_skew_idx_mean = np.argmin(skew_differences_mean)
print(f"Closest option based on mean: {skew_option_labels[closest_skew_idx_mean]} - difference: {skew_differences_mean[closest_skew_idx_mean]:.3f}")

print(f"\nInterpretation:")
if abs(mean_skewness) < 0.1:
    print("- Distribution is approximately symmetric (skewness ≈ 0)")
elif mean_skewness > 0:
    print("- Distribution is right-skewed (positive skew)")
else:
    print("- Distribution is left-skewed (negative skew)")

# Let's also do a quick theoretical check
print(f"\nTheoretical expectation:")
print(f"- OLS estimates are unbiased, so E[β̂] = β = {A}")
print(f"- With large sample size and normal errors, estimates should be approximately normal")
print(f"- Normal distribution has skewness = 0")
print(f"- Small deviations from 0 are expected due to sampling variability")

Probability of detecting nonzero effect: 86.6%

Sample regression results:
Intercept coefficient: 0.068
X coefficient: 1.137
W coefficient: -1.833
Standard error of X: 0.308
t-statistic for X: 3.692
Is significant (|t| > 1.96): True

Data characteristics:
Sample size (D): 1000
True effect of X (A): 1
Noise in X (B): 1
Noise in Y (C): 10
Correlation between X and W: 0.712
Standard deviation of X: 1.457
Standard deviation of Y: 10.040

Running multiple power calculations:
Trial 1: 87.4%
Trial 2: 91.8%
Trial 3: 88.2%
Trial 4: 88.0%
Trial 5: 88.4%
Trial 6: 86.4%
Trial 7: 88.2%
Trial 8: 88.8%
Trial 9: 90.0%
Trial 10: 90.6%

Mean power across trials: 88.8%
Standard deviation: 0.015

Closest option:
Option B (88%) - difference: 0.008

QUESTION 2: SKEWNESS OF X COEFFICIENT ESTIMATES
Skewness of X coefficient estimates: -0.040
Mean of X coefficients: 0.995
Standard deviation of X coefficients: 0.321
Min coefficient: -0.316
Max coefficient: 2.107

Closest option for skewness:
Option B (0) - diff

In [7]:
import numpy as np

def simulate(A=1, B=1, C=10, D=1000):
    """
    Simulate data according to the given model:
    W ~ N(0,1)
    X = W + N(0,B)
    Y = A*X - W + N(0,C)
    """
    W = np.random.normal(0, 1, D)
    X = W + np.random.normal(0, B, D)
    Y = A*X - W + np.random.normal(0, C, D)
    return Y, X, W

def manual_regression_with_W(Y, X, W):
    """
    Manually calculate regression Y ~ X + W and return the t-statistic for X
    """
    # Create design matrix with intercept, X, and W
    n = len(Y)
    X_matrix = np.column_stack([np.ones(n), X, W])
    
    # Calculate coefficients using normal equation: β = (X'X)^(-1)X'Y
    XTX = X_matrix.T @ X_matrix
    XTY = X_matrix.T @ Y
    coefficients = np.linalg.solve(XTX, XTY)
    
    # Calculate residuals
    Y_pred = X_matrix @ coefficients
    residuals = Y - Y_pred
    
    # Calculate residual sum of squares
    RSS = np.sum(residuals**2)
    
    # Degrees of freedom
    df = n - X_matrix.shape[1]  # n - number of parameters
    
    # Mean squared error
    MSE = RSS / df
    
    # Variance-covariance matrix of coefficients
    var_covar_matrix = MSE * np.linalg.inv(XTX)
    
    # Standard error of X coefficient (index 1)
    se_X = np.sqrt(var_covar_matrix[1, 1])
    
    # t-statistic for X coefficient
    t_stat = coefficients[1] / se_X
    
    return t_stat, coefficients, se_X

def find_power_for_B(B_value, A=1, C=10, D=1000, n_simulations=1000):
    """
    Calculate power for a specific value of B
    """
    significant_count = 0
    
    for i in range(n_simulations):
        # Generate data with specific B value
        Y, X, W = simulate(A=A, B=B_value, C=C, D=D)
        
        # Run regression
        t_stat, _, _ = manual_regression_with_W(Y, X, W)
        
        # Check if significant (|t| > 1.96)
        if abs(t_stat) > 1.96:
            significant_count += 1
    
    power = significant_count / n_simulations
    return power

# Question 3: Find B value for 50% power
print("QUESTION 3: FINDING B VALUE FOR 50% POWER")
print("="*60)

# Test the given options
print("Testing the given options:")
B_options = [1.8, 0.6, 0.2, 5.4]
B_option_labels = ['A (1.8)', 'B (0.6)', 'C (0.2)', 'D (5.4)']

np.random.seed(42)
powers_for_options = []

for i, B_val in enumerate(B_options):
    power = find_power_for_B(B_val, A=1, C=10, D=1000, n_simulations=1000)
    powers_for_options.append(power)
    print(f"Option {B_option_labels[i]}: B = {B_val}, Power = {power:.1%}")

# Find closest to 50%
target_power = 0.5
differences_from_50 = [abs(power - target_power) for power in powers_for_options]
closest_idx = np.argmin(differences_from_50)
print(f"\nClosest to 50% power:")
print(f"Option {B_option_labels[closest_idx]} - Power: {powers_for_options[closest_idx]:.1%}, Difference from 50%: {differences_from_50[closest_idx]:.3f}")

# Verify with multiple trials for the best option
print(f"\nVerification with multiple trials for Option {B_option_labels[closest_idx]}:")
verification_powers = []
for trial in range(5):
    np.random.seed(42 + trial)
    power = find_power_for_B(B_options[closest_idx], A=1, C=10, D=1000, n_simulations=800)
    verification_powers.append(power)
    print(f"Trial {trial+1}: {power:.1%}")

mean_verification_power = np.mean(verification_powers)
print(f"Mean power across trials: {mean_verification_power:.1%}")

print(f"\nInterpretation:")
print(f"- As B increases, noise in X increases")
print(f"- More noise in X makes it harder to detect the effect of X on Y")
print(f"- Higher B → Lower power")
print(f"- We need B = {B_options[closest_idx]} to get approximately 50% power")

# Show all results for comparison
print(f"\nSummary of all options:")
for i, (B_val, power) in enumerate(zip(B_options, powers_for_options)):
    print(f"Option {B_option_labels[i]}: B = {B_val:3.1f} → Power = {power:.1%}")

print(f"\nConceptual relationship:")
print(f"- B controls noise in X: X = W + N(0,B)")
print(f"- Larger B → more noise in X → less precise estimates → lower power")
print(f"- From our results: B = 1 gave ~88% power, so we need larger B for 50% power")

QUESTION 3: FINDING B VALUE FOR 50% POWER
Testing the given options:
Option A (1.8): B = 1.8, Power = 100.0%
Option B (0.6): B = 0.6, Power = 46.9%
Option C (0.2): B = 0.2, Power = 9.9%
Option D (5.4): B = 5.4, Power = 100.0%

Closest to 50% power:
Option B (0.6) - Power: 46.9%, Difference from 50%: 0.031

Verification with multiple trials for Option B (0.6):
Trial 1: 46.5%
Trial 2: 48.6%
Trial 3: 48.5%
Trial 4: 48.0%
Trial 5: 45.4%
Mean power across trials: 47.4%

Interpretation:
- As B increases, noise in X increases
- More noise in X makes it harder to detect the effect of X on Y
- Higher B → Lower power
- We need B = 0.6 to get approximately 50% power

Summary of all options:
Option A (1.8): B = 1.8 → Power = 100.0%
Option B (0.6): B = 0.6 → Power = 46.9%
Option C (0.2): B = 0.2 → Power = 9.9%
Option D (5.4): B = 5.4 → Power = 100.0%

Conceptual relationship:
- B controls noise in X: X = W + N(0,B)
- Larger B → more noise in X → less precise estimates → lower power
- From our resul

In [8]:
import numpy as np

def simulate(A=1, B=1, C=10, D=1000):
    """
    Simulate data according to the given model:
    W ~ N(0,1)
    X = W + N(0,B)
    Y = A*X - W + N(0,C)
    """
    W = np.random.normal(0, 1, D)
    X = W + np.random.normal(0, B, D)
    Y = A*X - W + np.random.normal(0, C, D)
    return Y, X, W

def manual_regression_with_W(Y, X, W):
    """
    Manually calculate regression Y ~ X + W and return the t-statistic for X
    """
    # Create design matrix with intercept, X, and W
    n = len(Y)
    X_matrix = np.column_stack([np.ones(n), X, W])
    
    # Calculate coefficients using normal equation: β = (X'X)^(-1)X'Y
    XTX = X_matrix.T @ X_matrix
    XTY = X_matrix.T @ Y
    coefficients = np.linalg.solve(XTX, XTY)
    
    # Calculate residuals
    Y_pred = X_matrix @ coefficients
    residuals = Y - Y_pred
    
    # Calculate residual sum of squares
    RSS = np.sum(residuals**2)
    
    # Degrees of freedom
    df = n - X_matrix.shape[1]  # n - number of parameters
    
    # Mean squared error
    MSE = RSS / df
    
    # Variance-covariance matrix of coefficients
    var_covar_matrix = MSE * np.linalg.inv(XTX)
    
    # Standard error of X coefficient (index 1)
    se_X = np.sqrt(var_covar_matrix[1, 1])
    
    # t-statistic for X coefficient
    t_stat = coefficients[1] / se_X
    
    return t_stat, coefficients, se_X

def find_power_for_A(A_value, B=1, C=10, D=100, n_simulations=1000):
    """
    Calculate power for a specific value of A (effect size)
    """
    significant_count = 0
    
    for i in range(n_simulations):
        # Generate data with specific A value
        Y, X, W = simulate(A=A_value, B=B, C=C, D=D)
        
        # Run regression
        t_stat, _, _ = manual_regression_with_W(Y, X, W)
        
        # Check if significant (|t| > 1.96)
        if abs(t_stat) > 1.96:
            significant_count += 1
    
    power = significant_count / n_simulations
    return power

# Question 4: Find A value for 50% power with smaller sample size
print("QUESTION 4: FINDING A VALUE FOR 50% POWER")
print("Parameters: B=1, C=10, D=100 (smaller sample size)")
print("="*60)

# Test the given options
print("Testing the given options:")
A_options = [4.0, 0.5, 2.0, 1.0]
A_option_labels = ['A (4.0)', 'B (0.5)', 'C (2.0)', 'D (1.0)']

np.random.seed(42)
powers_for_options = []

for i, A_val in enumerate(A_options):
    power = find_power_for_A(A_val, B=1, C=10, D=100, n_simulations=1000)
    powers_for_options.append(power)
    print(f"Option {A_option_labels[i]}: A = {A_val}, Power = {power:.1%}")

# Find closest to 50%
target_power = 0.5
differences_from_50 = [abs(power - target_power) for power in powers_for_options]
closest_idx = np.argmin(differences_from_50)
print(f"\nClosest to 50% power:")
print(f"Option {A_option_labels[closest_idx]} - Power: {powers_for_options[closest_idx]:.1%}, Difference from 50%: {differences_from_50[closest_idx]:.3f}")

# Verify with multiple trials for the best option
print(f"\nVerification with multiple trials for Option {A_option_labels[closest_idx]}:")
verification_powers = []
for trial in range(5):
    np.random.seed(42 + trial)
    power = find_power_for_A(A_options[closest_idx], B=1, C=10, D=100, n_simulations=800)
    verification_powers.append(power)
    print(f"Trial {trial+1}: {power:.1%}")

mean_verification_power = np.mean(verification_powers)
print(f"Mean power across trials: {mean_verification_power:.1%}")

print(f"\nInterpretation:")
print(f"- A is the true effect size (coefficient of X in Y = A*X - W + noise)")
print(f"- Larger A → stronger true effect → easier to detect → higher power")
print(f"- Smaller sample size (D=100) reduces power compared to D=1000")
print(f"- We need A = {A_options[closest_idx]} to get approximately 50% power")

# Show all results for comparison
print(f"\nSummary of all options:")
for i, (A_val, power) in enumerate(zip(A_options, powers_for_options)):
    print(f"Option {A_option_labels[i]}: A = {A_val:3.1f} → Power = {power:.1%}")

print(f"\nComparison with Question 1:")
print(f"- Question 1: A=1, B=1, C=10, D=1000 → Power ≈ 88%")
print(f"- Question 4: A=?, B=1, C=10, D=100 → Power ≈ 50%")
print(f"- Smaller sample size (100 vs 1000) requires larger effect size for same power")

# Let's also test A=1 with the new sample size for comparison
print(f"\nDirect comparison:")
power_A1_D100 = find_power_for_A(1.0, B=1, C=10, D=100, n_simulations=1000)
power_A1_D1000 = find_power_for_A(1.0, B=1, C=10, D=1000, n_simulations=1000)
print(f"A=1, D=100:  Power = {power_A1_D100:.1%}")
print(f"A=1, D=1000: Power = {power_A1_D1000:.1%}")
print(f"Sample size effect: {power_A1_D1000 - power_A1_D100:.1%} power increase with 10x sample size")

QUESTION 4: FINDING A VALUE FOR 50% POWER
Parameters: B=1, C=10, D=100 (smaller sample size)
Testing the given options:
Option A (4.0): A = 4.0, Power = 97.1%
Option B (0.5): A = 0.5, Power = 8.3%
Option C (2.0): A = 2.0, Power = 51.3%
Option D (1.0): A = 1.0, Power = 18.6%

Closest to 50% power:
Option C (2.0) - Power: 51.3%, Difference from 50%: 0.013

Verification with multiple trials for Option C (2.0):
Trial 1: 54.0%
Trial 2: 52.0%
Trial 3: 54.2%
Trial 4: 52.8%
Trial 5: 49.6%
Mean power across trials: 52.5%

Interpretation:
- A is the true effect size (coefficient of X in Y = A*X - W + noise)
- Larger A → stronger true effect → easier to detect → higher power
- Smaller sample size (D=100) reduces power compared to D=1000
- We need A = 2.0 to get approximately 50% power

Summary of all options:
Option A (4.0): A = 4.0 → Power = 97.1%
Option B (0.5): A = 0.5 → Power = 8.3%
Option C (2.0): A = 2.0 → Power = 51.3%
Option D (1.0): A = 1.0 → Power = 18.6%

Comparison with Question 1:
- 