In [None]:
#Use for Ttest

import pandas as pd
import statsmodels.api as sm

# === Step A: Read dataset ===
file_path = r"C:\Users\300393449\OneDrive - Douglas College\Documents\4th Semester\2_Business_Statistics_II\Python\DataScratch.xlsx"
sheet_name = "Reg_Model"
df = pd.read_excel(file_path, sheet_name=sheet_name)
print("Dataset:")
print(df)

# === Step B: Prepare data and fit model dynamically ===
y = df['y']
X = df.drop(columns=['y'])
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()

print("\n=== Regression Results ===")
print(model.summary())

# Show model equation
coeffs = model.params
equation = "y = " + " + ".join([
    f"{coeffs[i]:.3f}*{i}" if i != 'const' else f"{coeffs[i]:.3f}"
    for i in coeffs.index
])
print(f"\nModel Equation: {equation}")

# Interpret coefficients
print("\nInterpretation of coefficients:")
for var in coeffs.index:
    if var != 'const':
        print(f"- b_{var} ({coeffs[var]:.3f}): For each 1-unit increase in {var}, y changes by {coeffs[var]:.3f} units, holding others constant.")

# === Step C: Gather user input for prediction ===
print("\nEnter values for prediction:")
user_inputs = {}
for var in X.columns:
    if var != 'const':
        user_inputs[var] = float(input(f"Enter {var}: "))

# Ask for confidence level
while True:
    try:
        conf_level = float(input("Enter confidence level (e.g., 0.95 for 95%): "))
        if 0 < conf_level < 1:
            break
        else:
            print("Enter a decimal between 0 and 1.")
    except ValueError:
        print("Enter a valid decimal number.")

# Prepare input DataFrame for prediction
new_data = {'const': 1}
new_data.update(user_inputs)
new_df = pd.DataFrame([new_data])

# Generate prediction with intervals
prediction = model.get_prediction(new_df)
summary = prediction.summary_frame(alpha=1 - conf_level)

# Extract values
mean_pred = summary['mean'][0]                  # Point estimate
mean_ci_lower = summary['mean_ci_lower'][0]    # Lower bound for mean response CI
mean_ci_upper = summary['mean_ci_upper'][0]    # Upper bound for mean response CI
pred_ci_lower = summary['obs_ci_lower'][0]     # Lower bound for prediction interval
pred_ci_upper = summary['obs_ci_upper'][0]     # Upper bound for prediction interval

# Output results
print(f"\nPoint estimate of y (D): {mean_pred:.2f}")

print(f"\n{conf_level*100:.0f}% Confidence interval for the average value of y (B): "
      f"from {mean_ci_lower:.2f} to {mean_ci_upper:.2f}")

print(f"\n{conf_level*100:.0f}% Prediction interval for a particular value of y (A and C): "
      f"from {pred_ci_lower:.2f} to {pred_ci_upper:.2f}")


Dataset:
         y  x1    x2
0   0.2454   1  60.0
1   0.2569   1  60.0
2   0.4984   1  60.0
3   0.2531   2  60.0
4   0.3143   2  60.0
5   0.5316   2  60.0
6   0.5612   3  60.0
7   1.0346   3  60.0
8   1.3985   3  60.0
9   0.2466   1  72.5
10  0.2581   1  72.5
11  0.2566   1  72.5
12  0.2437   2  72.5
13  0.2703   2  72.5
14  0.4845   2  72.5
15  0.3027   3  72.5
16  0.8672   3  72.5
17  1.2242   3  72.5
18  0.2519   1  85.0
19  0.2546   1  85.0
20  0.2496   1  85.0
21  0.2545   2  85.0
22  0.2562   2  85.0
23  0.2904   2  85.0
24  0.2504   3  85.0
25  0.2528   3  85.0
26  0.3007   3  85.0

=== Regression Results ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.432
Model:                            OLS   Adj. R-squared:                  0.385
Method:                 Least Squares   F-statistic:                     9.137
Date:                Thu, 14 Aug 2025   Prob (F-statistic):

if confidence, use average
eg
It can be stated with​ 95% confidence that the average weekly rental rate for a​ four-bedroom house that is 5 years old and one block from the ocean is between the lower and upper limits of the​ 95% confidence interval. 

NOT

The average weekly rental rate for a​ four-bedroom house that is 5 years old and one block from the ocean is between the lower and upper limits of the​ 95% confidence interval​ 95% of the time. 

 
if prediction, use particular (not estimate)
eg
It can be stated with​ 95% confidence that the weekly rental rate for a particular​ four-bedroom house that is 5 years old and one block from the ocean is between the lower and upper limits of the​ 95% prediction interval.

NOT

The weekly rental rate for a particular​ four-bedroom house that is 5 years old and one block from the ocean is between the lower and upper limits of the​ 95% confidence interval​ 95% of the time.

INTERVAL DIFF EXPLANATION
The prediction interval in part e is 
wider
 than the confidence interval in part​ d, because the variation for estimating a single value is much 
greater than
 the variation for estimating an average value.

In [2]:
import pandas as pd
import statsmodels.api as sm

# === Step 1: Load dataset ===
file_path = r"C:\Users\300393449\OneDrive - Douglas College\Documents\4th Semester\2_Business_Statistics_II\Python\DataScratch.xlsx"
sheet_name = "Reg_Model"

df = pd.read_excel(file_path, sheet_name=sheet_name)
print("Dataset:")
print(df)

# === Step 2: Build model with all predictors ===
y = df['y']
X = df.drop(columns=['y'])
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()

print("\n=== Regression Summary ===")
print(model.summary())
coeffs = model.params

# === Step 3: Interpretation loop ===
while True:
    print("\nAvailable predictors:", [col for col in X.columns if col != 'const'])
    var_choice = input("Which variable do you want to interpret? ").strip()

    if var_choice not in X.columns or var_choice == 'const':
        print("Invalid choice. Please pick a valid predictor.")
        continue

    # Interpret chosen predictor
    b_value = coeffs[var_choice]
    print(f"\nInterpretation: For each 1-unit increase in {var_choice}, "
          f"y changes by {b_value:.3f} units, holding other variables constant.")

    # Gather full predictor values for prediction
    user_inputs = {}
    for col in X.columns:
        if col != 'const':
            val = float(input(f"Enter value for {col}: "))
            user_inputs[col] = val

    # Confidence level
    while True:
        try:
            conf_level = float(input("Enter confidence level (e.g., 0.95 for 95%): "))
            if 0 < conf_level < 1:
                break
            else:
                print("Please enter a decimal between 0 and 1.")
        except ValueError:
            print("Invalid input. Try again.")

    # Prepare prediction
    new_data = {'const': 1}
    new_data.update(user_inputs)
    new_df = pd.DataFrame([new_data])

    pred_frame = model.get_prediction(new_df).summary_frame(alpha=1 - conf_level)

    # Extract results
    mean_pred = round(pred_frame['mean'][0], 2)
    mean_ci_lower = round(pred_frame['mean_ci_lower'][0], 2)
    mean_ci_upper = round(pred_frame['mean_ci_upper'][0], 2)
    pred_ci_lower = round(pred_frame['obs_ci_lower'][0], 2)
    pred_ci_upper = round(pred_frame['obs_ci_upper'][0], 2)

    # Output results
    print(f"\nPrediction for given inputs: {mean_pred}")
    print(f"{conf_level*100:.0f}% Confidence Interval (average value of y): {mean_ci_lower} to {mean_ci_upper}")
    print(f"{conf_level*100:.0f}% Prediction Interval (individual value of y): {pred_ci_lower} to {pred_ci_upper}")

    # Interpret intervals
    print(f"\nMeaning of CI: It can be stated with {int(conf_level*100)}% confidence that "
          f"the average value of y for {var_choice} = {user_inputs[var_choice]} "
          f"and the given other predictors is between {mean_ci_lower} and {mean_ci_upper}.")

    print(f"Meaning of PI: For an individual case with {var_choice} = {user_inputs[var_choice]} "
          f"and the given other predictors, we expect y to fall between {pred_ci_lower} and {pred_ci_upper} "
          f"{int(conf_level*100)}% of the time.")

    # Ask if another variable should be interpreted
    next_choice = input("\nDo you want to interpret another predictor? (yes/no): ").strip().lower()
    if next_choice != "yes":
        print("\nDone.")
        break


Dataset:
        y      x1  x2  x3
0    1910  112743   2  18
1   13447  974845   8   3
2    1937  219703   0  13
3    3760  258047   6   7
4     267   37139   1  16
..    ...     ...  ..  ..
57   1195  201997   2   8
58   6531  152953   0  13
59     75    6475   0   2
60    470   27013   2  13
61   1111   51844   0   5

[62 rows x 4 columns]

=== Regression Summary ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.588
Model:                            OLS   Adj. R-squared:                  0.567
Method:                 Least Squares   F-statistic:                     27.60
Date:                Thu, 14 Aug 2025   Prob (F-statistic):           3.22e-11
Time:                        11:33:08   Log-Likelihood:                -709.62
No. Observations:                  62   AIC:                             1427.
Df Residuals:                      58   BIC:                             14

Which variable do you want to interpret?  x1



Interpretation: For each 1-unit increase in x1, y changes by 0.013 units, holding other variables constant.


Enter value for x1:  4
Enter value for x2:  4
Enter value for x3:  4
Enter confidence level (e.g., 0.95 for 95%):  .05



Prediction for given inputs: -1645.93
5% Confidence Interval (average value of y): -1902.17 to -1389.7
5% Prediction Interval (individual value of y): -3140.94 to -150.92

Meaning of CI: It can be stated with 5% confidence that the average value of y for x1 = 4.0 and the given other predictors is between -1902.17 and -1389.7.
Meaning of PI: For an individual case with x1 = 4.0 and the given other predictors, we expect y to fall between -3140.94 and -150.92 5% of the time.



Do you want to interpret another predictor? (yes/no):  no



Done.
